In [1]:
import pandas as pd
import urllib.parse
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("clean_data.csv", low_memory=False)

In [3]:
print(f"The DataFrame has {df.shape[0]} rows and {df.shape[1]} columns.")

The DataFrame has 18030 rows and 26 columns.


In [9]:
df

Unnamed: 0,id,Locality,Zip,Province,Type of property,Subtype of property,Type of sale,Price,Price of square meter,Building condition,...,Kitchen values,Surface of the land,Primary energy consumption,Energy_classes,Terrace,Terrace surface,Garden,Garden surface,Open fire,Swimming pool
0,10578809,liedekerke,1770,Flemish Brabant,house,house,for sale,349000,1938.89,As new,...,3,256.00,241,5,1,25.00,1,150.00,0.00,0.00
1,10578941,sint-andries,8200,West Flanders,house,house,for sale,625000,3378.38,As new,...,3,539.00,292,5,0,0.00,1,400.00,0.00,0.00
2,10577515,seraing,4100,Liège,house,house,for sale,149000,1419.05,Good,...,2,105.00,469,3,1,,1,25.00,0.00,0.00
3,10577846,andenne,5300,Namur,house,house,for sale,335000,1602.87,Good,...,1,2185.00,196,6,1,,1,,0.00,0.00
4,10523130,forest,1190,Brussels Capital Region,house,house,for sale,545000,2145.67,To be done up,...,1,260.00,517,2,1,45.00,0,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18025,9577413,sint-truiden,3800,Limburg,apartment,apartment,for sale,284900,2907.14,,...,-1,,-1,-1,1,18.00,1,,0.00,0.00
18026,10563952,nieuwpoort,8620,West Flanders,apartment,apartment,for sale,299000,6953.49,,...,3,,318,4,1,,0,0.00,0.00,0.00
18027,9577415,sint-truiden,3800,Limburg,apartment,apartment,for sale,299900,2911.65,,...,-1,,-1,-1,1,17.00,1,,0.00,0.00
18028,9577414,sint-truiden,3800,Limburg,apartment,apartment,for sale,299900,3190.43,,...,-1,,-1,-1,1,16.00,1,135.00,0.00,0.00


In [10]:
df_houses = df[df['Type of property'] == 'house']
df_apartments = df[df['Type of property'] == 'apartment']

all_missing_value_df = df.isnull().mean() * 100
houses_missing_value_df = df_houses.isnull().mean() * 100
apartments_missing_value_df = df_apartments.isnull().mean() * 100
pd.set_option('display.float_format', '{:.2f}'.format)
print("all_missing_value")
print(all_missing_value_df)
print("")
print("houses_missing_value")
print(houses_missing_value_df)
print("")
print("apartments_missing")
print(apartments_missing_value_df)

all_missing_value
id                            0.00
Locality                      0.00
Zip                           0.00
Province                      0.00
Type of property              0.00
Subtype of property           0.00
Type of sale                  0.00
Price                         0.00
Price of square meter         0.00
Building condition           15.77
Building Cond. values         0.00
Number of facades            23.31
Number of rooms               1.84
Living area                   0.00
Furnished                     0.00
Fully equipped kitchen       21.81
Kitchen values                0.00
Surface of the land          49.83
Primary energy consumption    0.00
Energy_classes                0.00
Terrace                       0.00
Terrace surface              23.89
Garden                        0.00
Garden surface               12.25
Open fire                     0.00
Swimming pool                 0.00
dtype: float64

houses_missing_value
id                            0.00


In [5]:
# Define provinces in each region
flanders_provinces = ['Flemish Brabant','Antwerp','Limburg','West Flanders','East Flanders']
wallonia_provinces = ['Walloon Brabant','Liège','Namur','Hainaut','Luxembourg']
brussels_provinces = ['Brussels Capital Region']

df_flanders = df[df['Province'].isin(flanders_provinces)]
df_wallonia = df[df['Province'].isin(wallonia_provinces)]
df_brussels = df[df['Province'].isin(brussels_provinces)]


In [6]:
def calculate_metrics(region_df):
    return region_df.groupby('Zip').agg(Avg_Price=('Price','mean'),
                                        Median_Price=('Price','median'),
                                        Price_Per_Sq_Meter=('Price of square meter','mean'))

In [7]:
belgium_metrics = calculate_metrics(df)
flanders_metrics = calculate_metrics(df_flanders)
wallonia_metrics = calculate_metrics(df_wallonia)
brussels_metrics = calculate_metrics(df_brussels)


most_expensive_belgium = belgium_metrics['Avg_Price'].idxmax()
least_expensive_belgium = belgium_metrics['Avg_Price'].idxmin()

most_expensive_flanders = flanders_metrics['Avg_Price'].idxmax()
least_expensive_flanders = flanders_metrics['Avg_Price'].idxmin()

most_expensive_wallonia = wallonia_metrics['Avg_Price'].idxmax()
least_expensive_wallonia = wallonia_metrics['Avg_Price'].idxmin()

most_expensive_brussels = brussels_metrics['Avg_Price'].idxmax()
least_expensive_brussels = brussels_metrics['Avg_Price'].idxmin()

In [8]:
pd.set_option('display.float_format', '{:.2f}'.format)

print(f'Most expensive municipality in Belgium: {most_expensive_belgium}')
print(belgium_metrics.loc[most_expensive_belgium])
print(f'Least expensive municipality in Belgium: {least_expensive_belgium}')
print(belgium_metrics.loc[least_expensive_belgium])

print(f'Most expensive municipality in Flanders: {most_expensive_flanders}')
print(flanders_metrics.loc[most_expensive_flanders])
print(f'Least expensive municipality in Flanders: {least_expensive_flanders}')
print(flanders_metrics.loc[least_expensive_flanders])

print(f'Most expensive municipality in Wallonia: {most_expensive_wallonia}')
print(wallonia_metrics.loc[most_expensive_wallonia])
print(f'Least expensive municipality in Wallonia: {least_expensive_wallonia}')
print(wallonia_metrics.loc[least_expensive_wallonia])

print(f'Most expensive municipality in Brussels: {most_expensive_brussels}')
print(brussels_metrics.loc[most_expensive_brussels])
print(f'Least expensive municipality in Brussels: {least_expensive_brussels}')
print(brussels_metrics.loc[least_expensive_brussels])


Most expensive municipality in Belgium: 8902
Avg_Price            2500000.00
Median_Price         2500000.00
Price_Per_Sq_Meter      3333.33
Name: 8902, dtype: float64
Least expensive municipality in Belgium: 5680
Avg_Price            49950.00
Median_Price         49950.00
Price_Per_Sq_Meter     445.40
Name: 5680, dtype: float64
Most expensive municipality in Flanders: 8902
Avg_Price            2500000.00
Median_Price         2500000.00
Price_Per_Sq_Meter      3333.33
Name: 8902, dtype: float64
Least expensive municipality in Flanders: 9970
Avg_Price            168000.00
Median_Price         168000.00
Price_Per_Sq_Meter     1768.42
Name: 9970, dtype: float64
Most expensive municipality in Wallonia: 1380
Avg_Price            1396184.35
Median_Price         1195000.00
Price_Per_Sq_Meter      3812.90
Name: 1380, dtype: float64
Least expensive municipality in Wallonia: 5680
Avg_Price            49950.00
Median_Price         49950.00
Price_Per_Sq_Meter     445.40
Name: 5680, dtype: float64
