In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
import geopandas as gpd
from matplotlib import pyplot as plt
import seaborn as sns
from itertools import cycle
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from visualize import plot_radarchart

## Social fabric (sociodemographics & housing)

In [4]:
# Load prepared features
X = pd.read_csv('../results/fabrics.csv', index_col=0)

In [5]:
# Convert percentages to absolute numbers
X['Dutch'] = X['Pct Dutch'].div(100).multiply(X['Residents'])
X['Western'] = X['Pct Western'].div(100).multiply(X['Residents'])
X['Non-western'] = X['Pct Non-western'].div(100).multiply(X['Residents'])

# Drop percentages
X.drop(columns=['Pct Dutch', 'Pct Western', 'Pct Non-western'], inplace=True)

demographic_features = ['0-15 y.o.',
                        '15-25 y.o.',
                        '25-45 y.o.',
                        '45-65 y.o.',
                        '65+ y.o.',
                        'Dutch',
                        'Western',
                        'Non-western',
                        'One-person hh',
                        'Multi-person hh w/o kids',
                        'Single-parent hh',
                        'Multi-person hh w kids',
                        'Ppl w benefits',
                        'Pct low-income hh',
                        'Pct high-income hh',
                        ]


building_features = [
    'Pct rented houses'
]

# Scale demographic features by total number of residents
# Two last features are already percentages
X[demographic_features[:-2]] = X[demographic_features[:-2]].div(X['Residents'], axis=0)
X.fillna(0, inplace=True)

# Save cluster labels
cluster_labels = X[['label']].copy()

# Select which features to use
X = X[demographic_features + building_features].copy()

In [6]:
# Scale features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns, index=X.index)

# Add cluster labels
X_scaled = pd.merge(X_scaled, cluster_labels, left_index=True, right_index=True)

In [7]:
# Rename and sort columns for better readability
label_abbreviations = {
    'One-person hh': '1-Person HH',
    'Multi-person hh w/o kids': 'Multi-P HH (No Kids)',
    'Single-parent hh': 'Single-Parent HH',
    'Multi-person hh w kids': 'Multi-P HH w/ Kids',
    'Ppl w benefits': 'Recipients of Soc. Benefits',
    'Pct low-income hh': 'Low-Income HH',
    'Pct rented houses': 'Rented Houses',
}

X_scaled.rename(columns=label_abbreviations, inplace=True)

sorted_columns = ['Recipients of Soc. Benefits', 
                  'Low-Income HH', 
                  'Non-western', 
                  'Western', 
                  'Dutch',  
                  'Single-Parent HH', 
                  'Multi-P HH w/ Kids', 
                  'Multi-P HH (No Kids)', 
                  '1-Person HH', 
                  '65+ y.o.', 
                  '45-65 y.o.', 
                  '25-45 y.o.', 
                  '15-25 y.o.', 
                  '0-15 y.o.', 
                  'Rented Houses',  
                  'label']

X_scaled = X_scaled[sorted_columns]

In [8]:
# Use the same colors as for time series clustering
my_colors = ['#FC2E20',
             '#FD7F20',
             '#1F77B4',
             '#AEC7E8',
             '#A89F91',
             '#7E735F']

In [9]:
df = X_scaled.copy()

In [None]:
plot_radarchart(df, my_colors, agg_type='median');

## Urban fabric (distance to amenities)

In [11]:
# Load prepared features
X = pd.read_csv('../results/fabrics.csv', index_col=0)

# Select which features to use
distance_features = [
    'Shopping',
    'Cafe & restaurants',
    'Entertainment, arts & culture',
    'Childcare',
    'Primary education',
    'Secondary & higher education',
    'National or provincial road',
    'Train station',
    'GP',
    'Hospital 9-17',
    'Hospital 24h',
    'Pharmacy',
    'GP station',
    'Distance to centre'
]

# Save cluster labels
cluster_labels = X[['label']].copy()

X = X[distance_features].copy()
X.rename(columns={'Distance to centre': 'City centre'}, inplace=True)

In [12]:
# Scale features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns, index=X.index)

# Add cluster labels
X_scaled = pd.merge(X_scaled, cluster_labels, left_index=True, right_index=True)

In [13]:
my_colors = ['#FC2E20',
             '#FD7F20',
             '#1F77B4',
             '#AEC7E8',
             '#A89F91',
             '#7E735F']

In [None]:
plot_radarchart(X_scaled, my_colors, agg_type='median');