In [1]:
import pandas as pd
import urllib.parse
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("clean_data.csv", low_memory=False)

In [3]:
print(f"The DataFrame has {df.shape[0]} rows and {df.shape[1]} columns.")

The DataFrame has 18030 rows and 26 columns.


In [4]:
df_houses = df[df['Type of property'] == 'house']
df_apartments = df[df['Type of property'] == 'apartment']

all_missing_value_df = df.isnull().mean() * 100
houses_missing_value_df = df_houses.isnull().mean() * 100
apartments_missing_value_df = df_apartments.isnull().mean() * 100

print("all_missing_value")
print(all_missing_value_df)
print("")
print("houses_missing_value")
print(houses_missing_value_df)
print("")
print("apartments_missing")
print(apartments_missing_value_df)

all_missing_value
id                             0.000000
Locality                       0.000000
Zip                            0.000000
Province                       0.000000
Type of property               0.000000
Subtype of property            0.000000
Type of sale                   0.000000
Price                          0.000000
Price of square meter          0.000000
Building condition            15.768164
Building Cond. values          0.000000
Number of facades             23.305602
Number of rooms                1.835829
Living area                    0.000000
Furnished                      0.000000
Fully equipped kitchen        21.813644
Kitchen values                 0.000000
Surface of the land           49.833611
Primary energy consumption     0.000000
Energy_classes                 0.000000
Terrace                        0.000000
Terrace surface               23.893511
Garden                         0.000000
Garden surface                12.246256
Open fire             

In [5]:
# Define provinces in each region
flanders_provinces = ['Flemish Brabant','Antwerp','Limburg','West Flanders','East Flanders']
wallonia_provinces = ['Walloon Brabant','Liège','Namur','Hainaut','Luxembourg']
brussels_provinces = ['Brussels Capital Region']

df_flanders = df[df['Province'].isin(flanders_provinces)]
df_wallonia = df[df['Province'].isin(wallonia_provinces)]
df_brussels = df[df['Province'].isin(brussels_provinces)]


In [6]:
def calculate_metrics(region_df):
    return region_df.groupby('Zip').agg(Avg_Price=('Price','mean'),
                                        Median_Price=('Price','median'),
                                        Price_Per_Sq_Meter=('Price of square meter','mean'))

In [7]:
belgium_metrics = calculate_metrics(df)
flanders_metrics = calculate_metrics(df_flanders)
wallonia_metrics = calculate_metrics(df_wallonia)
brussels_metrics = calculate_metrics(df_brussels)


most_expensive_belgium = belgium_metrics['Avg_Price'].idxmax()
least_expensive_belgium = belgium_metrics['Avg_Price'].idxmin()

most_expensive_flanders = flanders_metrics['Avg_Price'].idxmax()
least_expensive_flanders = flanders_metrics['Avg_Price'].idxmin()

most_expensive_wallonia = wallonia_metrics['Avg_Price'].idxmax()
least_expensive_wallonia = wallonia_metrics['Avg_Price'].idxmin()

most_expensive_brussels = brussels_metrics['Avg_Price'].idxmax()
least_expensive_brussels = brussels_metrics['Avg_Price'].idxmin()

In [8]:
pd.set_option('display.float_format', '{:.2f}'.format)

print(f'Most expensive municipality in Belgium: {most_expensive_belgium}')
print(belgium_metrics.loc[most_expensive_belgium])
print(f'Least expensive municipality in Belgium: {least_expensive_belgium}')
print(belgium_metrics.loc[least_expensive_belgium])

print(f'Most expensive municipality in Flanders: {most_expensive_flanders}')
print(flanders_metrics.loc[most_expensive_flanders])
print(f'Least expensive municipality in Flanders: {least_expensive_flanders}')
print(flanders_metrics.loc[least_expensive_flanders])

print(f'Most expensive municipality in Wallonia: {most_expensive_wallonia}')
print(wallonia_metrics.loc[most_expensive_wallonia])
print(f'Least expensive municipality in Wallonia: {least_expensive_wallonia}')
print(wallonia_metrics.loc[least_expensive_wallonia])

print(f'Most expensive municipality in Brussels: {most_expensive_brussels}')
print(brussels_metrics.loc[most_expensive_brussels])
print(f'Least expensive municipality in Brussels: {least_expensive_brussels}')
print(brussels_metrics.loc[least_expensive_brussels])


Most expensive municipality in Belgium: 8902
Avg_Price            2500000.00
Median_Price         2500000.00
Price_Per_Sq_Meter      3333.33
Name: 8902, dtype: float64
Least expensive municipality in Belgium: 5680
Avg_Price            49950.00
Median_Price         49950.00
Price_Per_Sq_Meter     445.40
Name: 5680, dtype: float64
Most expensive municipality in Flanders: 8902
Avg_Price            2500000.00
Median_Price         2500000.00
Price_Per_Sq_Meter      3333.33
Name: 8902, dtype: float64
Least expensive municipality in Flanders: 9970
Avg_Price            168000.00
Median_Price         168000.00
Price_Per_Sq_Meter     1768.42
Name: 9970, dtype: float64
Most expensive municipality in Wallonia: 1380
Avg_Price            1396184.35
Median_Price         1195000.00
Price_Per_Sq_Meter      3812.90
Name: 1380, dtype: float64
Least expensive municipality in Wallonia: 5680
Avg_Price            49950.00
Median_Price         49950.00
Price_Per_Sq_Meter     445.40
Name: 5680, dtype: float64
