In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import math
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter, StrMethodFormatter
from scipy import sparse
import os
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
from fastprogress.fastprogress import master_bar, progress_bar
from scipy.cluster.hierarchy import dendrogram
from statsmodels.api import WLS,OLS, add_constant, tools



In [2]:
matplotlib.rcParams['font.family'] = 'Liberation Sans'
# Setting up matplotlib parameters.
plt.rc('font',size = 12)
plt.rc('axes',titlesize = 12)
plt.rc('axes',labelsize = 12)
plt.rc('xtick',labelsize = 12)
plt.rc('ytick',labelsize = 12)
plt.rc('legend',fontsize = 12)
plt.rc('figure',titlesize = 16)
# Grid in the background.
plt.rcParams['axes.axisbelow'] = True

# cm to inch.
ctm = 1.54
# Column width of the document reporting the results.
text_width = 16

# Conceptual example
## Color code for the spatial units in the conceptual examples

In [3]:
# Norm of the color bar for the conceptual examples.
norm_color = matplotlib.colors.Normalize(vmin = -10,vmax = 40)

# color_spatial_unit provides the color code for the spatial units in the 8 conceptual examples.
color_spatial_unit = pd.DataFrame({'exposure':[2.5,4.2857,5,6,7.125,8.75,9.5,11,18,20,20,25,30,33,55,33],'R':[0]*16})
# Difference with the city exposure.
color_spatial_unit['exposure'] = color_spatial_unit['exposure'] - 10
# Difference with the city exposure in scenario B2.
color_spatial_unit.loc[14:15,'exposure'] = color_spatial_unit.loc[14:15,'exposure'] - 30
cmap_gnuplot = matplotlib.cm.get_cmap('gnuplot_r')
color_spatial_unit[['R','G','B','A']] = cmap_gnuplot(norm_color(color_spatial_unit['exposure']))
color_spatial_unit['R'] = np.round(color_spatial_unit['R'] * 255)
color_spatial_unit['G'] = np.round(color_spatial_unit['G'] * 255)
color_spatial_unit['B'] = np.round(color_spatial_unit['B'] * 255)

In [4]:
color_spatial_unit

Unnamed: 0,exposure,R,G,B,A
0,-7.5,249.0,221.0,0.0,1.0
1,-5.7143,244.0,197.0,0.0,1.0
2,-5.0,242.0,187.0,0.0,1.0
3,-4.0,240.0,175.0,0.0,1.0
4,-2.875,236.0,162.0,0.0,1.0
5,-1.25,232.0,144.0,0.0,1.0
6,-0.5,230.0,136.0,0.0,1.0
7,1.0,225.0,121.0,0.0,1.0
8,8.0,204.0,67.0,0.0,1.0
9,10.0,198.0,55.0,0.0,1.0


## Value of the indicators in the conceptual example

In [5]:
# Function computing the entropy index for the segregation.
def entropy(p1,p2,N):
    res_mix_city = (p1 *N + p2*(9-N))/9
    temp = N*p1*np.log(p1) + (9-N) * p2 *np.log(p2)
    temp = temp/(9*res_mix_city*np.log(res_mix_city))
    H = 1 - temp
    return H

entropy_seg = [entropy(0.18,0.06,3),entropy(0.3,0.042857,2),entropy(0.2,0.05,3),entropy(0.25,0.025,3),
               entropy(0.2,0.0875,1),entropy(0.33,0.07125,1),entropy(0.11,0.095,3),entropy(0.55,0.325,3)]


toy_ex = pd.DataFrame({'scenario':['D1','D2','C1','C2','A1','A2','B1','B2'],
                       'intensity':[0.08,0.2,0.1,0.15,0.1,0.23,0.01,0.15],
                       'separation':[0.6,0.6667,0.6667,0.8333,0.2222,0.3667,0.3667,0.4583],
                       'scale':[0.1111,0.1111,0.3333,0.3333,0.1111,0.1111,0.3333,0.3333],
                       'seg' :entropy_seg})

fig, ax = plt.subplots(figsize = (text_width/ctm,text_width/ctm*0.4),layout='constrained')
norm_color = matplotlib.colors.Normalize(vmin = 0,vmax = 30)

scatter_plot = ax.scatter(toy_ex['separation'] * 100,
                          toy_ex['scale'] * 100,
                          c = toy_ex['intensity'] * 100, 
                          cmap = 'gnuplot_r',
                          norm = norm_color,
                          edgecolors='Black',
                          s = 70)


for i in range(len(toy_ex)):
    x = toy_ex.loc[i,'separation']*100
    y = toy_ex.loc[i,'scale']*100
    
    # Plotting the labels for the scenario.
    plt.text(x - 3, y  + 1 , toy_ex.loc[i,'scenario'])

    # Plotting the labels for the entropy.
    if toy_ex.loc[i,'scenario'] in ('A1','A2','B2','C1','D2'):
        plt.text(x - 0.5, y - 3.5 , 'H = ' + '{:.1f}'.format(toy_ex.loc[i,'seg']*100))
    elif toy_ex.loc[i,'scenario'] == 'C2':
        plt.text(x - 6, y - 3.5 , 'H = ' + '{:.1f}'.format(toy_ex.loc[i,'seg']*100))
    else:
        plt.text(x - 4, y - 3.5 , 'H = ' + '{:.1f}'.format(toy_ex.loc[i,'seg']*100))


ax.set_ylabel('Scale: share of population living \n in the median segregated region [%]')
ax.set_xlabel('Separation: share of individuals from the group of interest \n living in segregated regions [%]')

ax.set_xlim([15,85])
ax.set_ylim([5,40])

cb = fig.colorbar(matplotlib.cm.ScalarMappable(norm = norm_color,cmap = 'gnuplot_r'),
                  location = 'right', 
                  aspect = 20, pad = 0.01)
cb.set_label('Intensity: difference between \n the exposure level in segregated regions \n and the city average [% points]')
#fig.tight_layout()
#plt.savefig('../report/figures/three_indicators_toy_example.png', facecolor = 'white',dpi = 175,bbox_inches = 'tight',pad_inches =0)
#plt.savefig('../report/figures/conceptual_examples/three_indicators_toy_example.svg', facecolor = 'white',pad_inches =0)
plt.close()

In [6]:
size_seg = gpd.read_file('../data/results/regions/regions.gpkg')
# Extracting information per city.
city_data = size_seg.loc[:,['city','share_NW_c']].drop_duplicates(subset = 'city')

In [7]:
# Computing the scale: size of the median segregated region.
size_seg = size_seg.sort_values(by = ['city','seg','pop_res'])
# cum_sum indicates the cumulative number of inhabitants living in a segregated region.
size_seg['cum_sum'] = size_seg[['city','seg','pop_res']].groupby(by = ['city','seg']).cumsum()
tot_pop_seg = size_seg[['city','seg','pop_res']].groupby(by = ['city','seg']).sum().rename(columns = {'pop_res':'tot_pop_seg'})
size_seg = size_seg.merge(tot_pop_seg, on = ['city','seg'])
# The scale is the size of the region that for which 50% of inhabitants living in a segregated area live in an area larger than the median.
size_seg = size_seg.sort_values(by = 'pop_res')
scale = size_seg.loc[size_seg['cum_sum'] > size_seg['tot_pop_seg'] * 0.5].drop_duplicates(subset = ['city','seg'])
scale = scale.loc[:,['city','seg','pop_res','NW_rel','share_NW_c','pop_city']]

scale = scale.rename(columns = {'pop_res':'scale'})
scale['seg'] = scale['seg'].mask(scale['scale'] == scale['pop_city'],0)
scale = scale.reset_index()

In [8]:
# Computing the intensity: difference between the average exposure in segregated region and the city average.
size_seg = size_seg.rename(columns = {'expos_NW_reg':'expos'})
size_seg['expos_NW_reg'] = size_seg['expos'] * size_seg['pop_res'] / size_seg['tot_pop_seg']
intensity = size_seg[['city','seg','expos_NW_reg','share_NW_c']].groupby(by = ['city','seg','share_NW_c']).sum()
intensity = intensity.reset_index()
intensity['intensity'] = intensity['expos_NW_reg'] - intensity['share_NW_c'] 

In [9]:
# Computing the separation: share of individuals with a NW migration background living in a segregated region.
size_seg['separation'] = 0
size_seg['separation'] = size_seg['separation'].mask((size_seg['NW_city'] > 0) & ((size_seg['seg'] >= 0)),
                                                     size_seg['N_NW_res']/size_seg['NW_city'])
size_seg['separation'] = size_seg['separation'].mask((size_seg['NW_city'] > 0) & ((size_seg['seg'] == -1)),
                                                     (size_seg['pop_res'] - size_seg['N_NW_res'])/(size_seg['pop_city'] - size_seg['NW_city']))
separation = size_seg[['city','seg','separation']].groupby(by = ['city','seg']).sum()
separation = separation.reset_index()

In [10]:
# Collecting all indicators in one table.
seg_ind = separation.merge(intensity.drop(columns = 'expos_NW_reg'), 
                           on = ['city','seg'],
                           how = 'outer')

seg_ind = seg_ind.merge(scale[['city','seg','scale','pop_city']],
                        on = ['city','seg'],
                        how = 'outer')
                        
seg_ind['scale'] = seg_ind['scale'].mask(seg_ind['scale'].isna(),0)

seg_ind['scale_rel'] = seg_ind['scale'] / seg_ind['pop_city']

seg_ind['separation'] = seg_ind['separation'].mask(seg_ind['scale'] == seg_ind['pop_city'],0)

In [11]:
seg_NW = seg_ind.loc[(seg_ind['seg'] == 1) & (seg_ind['scale'] >= 30)].copy()
seg_NW = seg_NW.drop(columns = ['share_NW_c','share_NW_c','pop_city','seg'])
seg_NW = seg_NW.merge(city_data, on = 'city', how = 'right')

seg_NW = seg_NW.fillna(0)

In [12]:
# Adding extra information on the municipalities.
gemeente = gpd.read_file('../data/raw_data/sociodemographics/Gemeente/WijkBuurtkaart_2017_v3.gpkg',
                         layer = 'gemeente_2017_v3')

gemeente = gemeente.loc[:,['GM_CODE','GM_NAAM','geometry','AANT_INW']]

gemeente = gemeente.dissolve(by = 'GM_NAAM').reset_index()

gemeente['GM_NAAM'] = gemeente['GM_NAAM'].str.replace("'",'')
gemeente['GM_NAAM'] = gemeente['GM_NAAM'].str.replace(' ','_')
gemeente['GM_NAAM'] = gemeente['GM_NAAM'].str.replace('-','_')
gemeente['GM_NAAM'] = gemeente['GM_NAAM'].str.replace('\(','', regex=True)
gemeente['GM_NAAM'] = gemeente['GM_NAAM'].str.replace('.\)','', regex=True)

gemeente  = gemeente.merge(seg_NW, left_on = 'GM_NAAM',right_on = 'city', how = 'left') 
gemeente['scale'] = gemeente['scale'].mask(gemeente['scale'].isna(), 0)
gemeente['separation'] = gemeente['separation'].mask(gemeente['separation'].isna(), 0)
gemeente['intensity'] = gemeente['intensity'].mask(gemeente['intensity'].isna(), 0)
gemeente['share_NW_c'] = gemeente['share_NW_c'].mask(gemeente['share_NW_c'].isna(),0)
gemeente['scale_rel'] = gemeente['scale_rel'].mask(gemeente['scale_rel'].isna(),0)
gemeente['separation'] = gemeente['separation'].mask(gemeente['scale_rel'] == 0,0)

gemeente = gemeente.sort_values(by = 'AANT_INW', ascending = False)
gemeente = gemeente.reset_index(drop = True)

gemeente = gemeente.loc[~gemeente['GM_NAAM'].isin(['Buitenland','Baarle_Nassau'])].copy()

In [13]:
gini = pd.read_csv('../data/raw_data/income_inequality/Gini_coef.csv')

gini['naam'] = gini['naam'].str.replace("'",'')
gini['naam'] = gini['naam'].str.replace(' ','_')
gini['naam'] = gini['naam'].str.replace('-','_')
gini['naam'] = gini['naam'].str.replace('\(','', regex=True)
gini['naam'] = gini['naam'].str.replace('.\)','', regex=True)
gini = gini.rename(columns = {'naam':'GM_NAAM','Gini-coëfficiënt':'Gini'})
gini = gini.drop(columns = 'Column1')
gemeente_gini = gemeente.merge(gini, on = 'GM_NAAM', how  = 'left')
gemeente_gini = gemeente_gini.sort_values(by = 'share_NW_c')
gemeente_gini['Gini'] = gemeente_gini['Gini'].str.replace(',','.').astype(float)

In [14]:
gemeente_gini.to_file('../data/results/descriptive_statistics/indicators_with_outliers.gpkg')

  pd.Int64Index,


In [15]:
gemeente_gini = gemeente_gini.loc[~gemeente_gini['GM_NAAM'].isin(['Gulpen_Wittem', 'Eijsden_Margraten',
                                                                  'Laarbeek', 'Schinnen', 'Tynaarlo', 
                                                                  'Binnenmaas', 'Noordenveld',
                                                                  'Vlagtwedde', 'het_Bildt','Leudal'])]

In [16]:
gemeente_gini.to_file('../data/results/descriptive_statistics/indicators_without_outliers.gpkg')

  pd.Int64Index,


In [17]:
# Histograms of intensity, separation and scale.
fig,ax = plt.subplots(1,3,figsize = (text_width/ctm,text_width/3.5/ctm), sharex = True,sharey = True,layout='constrained')

norm_hist = matplotlib.colors.Normalize(vmin = 0,vmax = 1)

hist_intensity = ax[0]
N, bins, patches = hist_intensity.hist(gemeente_gini['intensity']*100,bins = range(0,101,5))
# Now, we'll loop through our objects and set the color of each accordingly
for thisbin, thispatch in zip(bins[:-1], patches):
    quantile = len(gemeente_gini.loc[gemeente_gini['intensity']<(thisbin/100)])/len(gemeente_gini)
    color = cmap_gnuplot(quantile)
    thispatch.set_facecolor(color)
hist_intensity.set_ylabel('Number of municipalities')
hist_intensity.set_xlabel('Intensity [% points]',labelpad=10, fontsize = 18)
hist_intensity.grid(visible =1, axis = 'y')

hist_separation = ax[1]
N, bins, patches = hist_separation.hist(gemeente_gini['separation']*100,bins = range(0,101,5))
# Now, we'll loop through our objects and set the color of each accordingly
for thisbin, thispatch in zip(bins[:-1], patches):
    quantile = len(gemeente_gini.loc[gemeente_gini['separation']<(thisbin/100)])/len(gemeente_gini)
    color = cmap_gnuplot(quantile)
    thispatch.set_facecolor(color)
hist_separation.tick_params(left=False, labelleft=False)
hist_separation.set_xlabel('Separation [%]',labelpad=10, fontsize = 18)
hist_separation.grid(visible =1, axis = 'y')


hist_scale = ax[2]
N, bins, patches = hist_scale.hist(gemeente_gini['scale_rel']*100,bins = range(0,101,5))
# Now, we'll loop through our objects and set the color of each accordingly
for thisbin, thispatch in zip(bins[:-1], patches):
    quantile = len(gemeente_gini.loc[gemeente_gini['scale_rel']<(thisbin/100)])/len(gemeente_gini)
    color = cmap_gnuplot(quantile)
    thispatch.set_facecolor(color)
hist_scale.tick_params(left=False, labelleft=False)
hist_scale.set_xlabel('Relative scale [%]', labelpad=10, fontsize = 18)
hist_scale.grid(visible =True, axis = 'y')


#plt.savefig('../report/figures/distribution_1D/1_D_dist_ind.svg', facecolor = 'white',pad_inches =0)
plt.close()

In [18]:
# Quartiles.
print('Intensity q1: {:.2f}, q2: {:.2f}, q3: {:.2f} [% points]'.format(gemeente_gini['intensity'].quantile(0.25)*100,
                                                                       gemeente_gini['intensity'].quantile(0.5)*100,
                                                                       gemeente_gini['intensity'].quantile(0.75)*100))
print('Separation q1: {:.2f}, q2: {:.2f}, q3: {:.2f} [%]'.format(gemeente_gini['separation'].quantile(0.25)*100,
                                                                 gemeente_gini['separation'].quantile(0.5)*100,
                                                                 gemeente_gini['separation'].quantile(0.75)*100))
print('Scale (rel) q1: {:.2f}, q2: {:.2f}, q3: {:.2f} [%]'.format(gemeente_gini['scale_rel'].quantile(0.25)*100,
                                                                  gemeente_gini['scale_rel'].quantile(0.5)*100,
                                                                  gemeente_gini['scale_rel'].quantile(0.75)*100))

Intensity q1: 3.11, q2: 5.18, q3: 8.96 [% points]
Separation q1: 51.62, q2: 68.59, q3: 77.88 [%]
Scale (rel) q1: 7.93, q2: 19.25, q3: 29.46 [%]


In [19]:
print('Intensity skewness: {:.2f}, excess kurtosis: {:.2f}'.format(gemeente_gini['intensity'].skew(),
                                                                   gemeente_gini['intensity'].kurtosis()))
print('Separation skewness: {:.2f}, excess kurtosis: {:.2f}'.format(gemeente_gini['separation'].skew(),
                                                                    gemeente_gini['separation'].kurtosis()))
print('Scale (rel) skewness: {:.2f}, excess kurtosis: {:.2f}'.format(gemeente_gini['scale_rel'].skew(),
                                                                     gemeente_gini['scale_rel'].kurtosis()))

Intensity skewness: 1.36, excess kurtosis: 2.17
Separation skewness: -1.24, excess kurtosis: 0.72
Scale (rel) skewness: 0.58, excess kurtosis: -0.11


In [20]:
# relations between intensity, separation and scale.
fig,ax = plt.subplots(1,3,figsize = (text_width/ctm,4.5/ctm), layout = 'constrained')

ax[0].scatter(gemeente_gini['separation']*100,
                gemeente_gini['intensity']*100,
                color = 'black',
                alpha = 0.5)

ax[0].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
ax[0].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))

ax[1].scatter(gemeente_gini['separation']*100,
                gemeente_gini['scale_rel']*100,
                color = 'black',
                alpha = 0.5)
ax[1].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
ax[1].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
ax[1].set_xlabel('Separation [%]')
ax[1].set_ylabel('Scale [%]')

ax[2].scatter(gemeente_gini['scale_rel']*100,
                gemeente_gini['intensity']*100,
                color = 'black',
                alpha = 0.5)
ax[2].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
ax[2].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
ax[2].set_xlabel('Scale [%]')
ax[2].set_ylabel('Intensity [% points]')

#plt.savefig('../report/figures/2D_relations.svg', facecolor = 'white',pad_inches =0)
plt.close()

In [21]:
gemeente_gini[['intensity','separation','scale_rel']].loc[gemeente_gini['intensity'] != 0].corr()

Unnamed: 0,intensity,separation,scale_rel
intensity,1.0,-0.08591,-0.217458
separation,-0.08591,1.0,0.70711
scale_rel,-0.217458,0.70711,1.0


In [22]:
gemeente_gini = gemeente_gini.sort_values(by ='AANT_INW', ascending = False)

In [23]:
# Intensity, separation, scale in one plot.
fig, ax = plt.subplots(figsize = (text_width/ctm,text_width/ctm*0.6),layout='constrained')
pop_city_scale = 1.5/2e3


norm_color = matplotlib.colors.Normalize(vmin = 0,vmax = 30)

gemeente_gini['case_study'] = 1
gemeente_gini['case_study'] = gemeente_gini['case_study'].mask(gemeente_gini['GM_NAAM'].isin(['s_Gravenhage','Alkmaar']),4)

scatter_plot = ax.scatter(gemeente_gini['separation'] * 100,
                          gemeente_gini['scale_rel'] * 100,
                          c = gemeente_gini['intensity'] * 100,
                          cmap = 'gnuplot_r',
                          linewidths= gemeente_gini['case_study'],
                          norm = norm_color,
                          edgecolors='Black',
                          s = gemeente_gini['AANT_INW']*pop_city_scale)


#plt.rc('axes',labelsize = 10)

# Produce a legend for the intensity (colors).
# legend1 = ax.legend(*scatter_plot.legend_elements(num=[0,0.1,0.3,0.5]),
#                     loc='center left', title='Intensity')
# ax.add_artist(legend1)

# Produce a legend for the city population (sizes). we use the *func* argument to map the number of inhabitants to the size of the dot.
kw = dict(prop='sizes',
          fmt = '{x:,.0f}',
          func=lambda x:x/pop_city_scale,
          color='White', 
          markeredgecolor = 'Black',
          num = [10000,50000,100000,200000,500000])

legend2 = ax.legend(*scatter_plot.legend_elements(**kw),
                    loc='center left', 
                    title='City size [inhabitants]', 
                    handletextpad = 1.5, 
                    labelspacing = 1.5,borderpad = 1)

ax.set_ylabel('Relative scale [%]')
ax.set_xlabel('Separation [%]')

cb = fig.colorbar(matplotlib.cm.ScalarMappable(norm = norm_color,cmap = 'gnuplot_r'), 
                  location = 'right', 
                  aspect = 40, pad = 0.01)
cb.set_label('Intensity [% points]')

#plt.savefig('../report/figures/segregation_patterns/three_indicators.svg', facecolor = 'white',pad_inches =0)
plt.close()

In [24]:
norm_color = matplotlib.colors.Normalize(vmin = 0,vmax = 40)
# Intensity, separation and scale vs city size, residential mix, and Gini coefficient.
fig, axes = plt.subplots(3,3,figsize = (text_width/ctm,13/ctm), constrained_layout=True)

axes[0][0].scatter(gemeente_gini['AANT_INW']/1000,
                   gemeente_gini['intensity']*100, 
                   c= gemeente_gini['share_NW_c']*100,
                   s= gemeente_gini['AANT_INW']*pop_city_scale/1.5,
                   norm = norm_color, 
                   alpha = 0.7,
                   cmap = 'gnuplot_r')
axes[0][0].set_xticks(range(200,1000,200))
axes[0][0].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
axes[0][0].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
#axes[0][0].set_ylabel('Intensity [% points]')
axes[0][0].set_title('Intensity [% points]', loc = 'left')

axes[0][1].scatter(gemeente_gini['AANT_INW']/1000,
                   gemeente_gini['separation']*100, 
                   c = gemeente_gini['share_NW_c']*100,
                   s= gemeente_gini['AANT_INW']*pop_city_scale/1.5,
                   norm = norm_color,
                   alpha = 0.7,
                   cmap = 'gnuplot_r')
axes[0][1].set_xticks(range(200,1000,200))
axes[0][1].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
axes[0][1].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
#axes[0][1].set_ylabel('Separation [%]')
axes[0][1].set_title('Separation [%]', loc = 'left')
axes[0][1].set_xlabel('City population [thousands inhabitants]')

axes[0][2].scatter(gemeente_gini['AANT_INW']/1000,
                   gemeente_gini['scale_rel'] *100, 
                   c = gemeente_gini['share_NW_c']*100,
                   s= gemeente_gini['AANT_INW']*pop_city_scale/1.5, 
                   norm = norm_color, 
                   alpha = 0.7,
                   cmap = 'gnuplot_r')
axes[0][2].set_xticks(range(200,1000,200))
axes[0][2].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
axes[0][2].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
#axes[0][2].set_ylabel('Scale [%]')
axes[0][2].set_title('Scale [%]', loc = 'left')

axes[1][0].scatter(gemeente_gini['share_NW_c']*100,
            gemeente_gini['intensity']*100, 
            s = gemeente_gini['AANT_INW']*pop_city_scale/1.5,
            c = gemeente_gini['share_NW_c']*100,
            norm = norm_color, 
            alpha = 0.7,
            cmap = 'gnuplot_r')
axes[1][0].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
axes[1][0].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
#axes[1][0].set_ylabel('Intensity [% points]')

axes[1][1].scatter(gemeente_gini['share_NW_c']*100,
                   gemeente_gini['separation']*100, 
                   s = gemeente_gini['AANT_INW']*pop_city_scale/1.5,
                   c = gemeente_gini['share_NW_c']*100,
                   norm = norm_color, 
                   alpha = 0.7,
                   cmap = 'gnuplot_r')
axes[1][1].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
axes[1][1].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
#axes[1][1].set_ylabel('Separation [%]')
axes[1][1].set_xlabel('Share of residents with a NW migration background [%]')

axes[1][2].scatter(gemeente_gini['share_NW_c']*100,
                   gemeente_gini['scale_rel'] *100, 
                   s = gemeente_gini['AANT_INW']*pop_city_scale/1.5, 
                   c = gemeente_gini['share_NW_c']*100,
                   norm = norm_color, 
                   alpha = 0.7,
                   cmap = 'gnuplot_r')
axes[1][2].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
axes[1][2].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
#axes[1][2].set_ylabel('Scale [%]')

axes[2][0].scatter(gemeente_gini['Gini']*100,
                   gemeente_gini['intensity']*100, 
                   s = gemeente_gini['AANT_INW']*pop_city_scale/1.5,
                   c = gemeente_gini['share_NW_c']*100,
                   norm = norm_color,
                   alpha = 0.7,
                   cmap = 'gnuplot_r')
            
axes[2][0].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
axes[2][0].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
#axes[2][0].set_ylabel('Intensity [%]')

axes[2][1].scatter(gemeente_gini['Gini']*100,
                   gemeente_gini['separation']*100, 
                   s = gemeente_gini['AANT_INW']*pop_city_scale/1.5,
                   c = gemeente_gini['share_NW_c']*100,
                   norm = norm_color, 
                   alpha = 0.7,
                   cmap = 'gnuplot_r')
axes[2][1].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
axes[2][1].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
#axes[2][1].set_ylabel('Separation [%]')
axes[2][1].set_xlabel('Gini coefficient')

axes[2][2].scatter(gemeente_gini['Gini']*100,
                   gemeente_gini['scale_rel'] *100,
                   s = gemeente_gini['AANT_INW']*pop_city_scale/1.5, 
                   c = gemeente_gini['share_NW_c']*100,
                   norm = norm_color, 
                   alpha = 0.7,
                   cmap = 'gnuplot_r')
axes[2][2].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
axes[2][2].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
#axes[2][2].set_ylabel('Scale [%]')

cb = fig.colorbar(matplotlib.cm.ScalarMappable(norm = norm_color, cmap =  'gnuplot_r'),
                  ax = axes[:,:],
                  pad = 0.01, aspect = 40)
cb.set_label('Share of residents with a NW migration background [%]')
fig.set_constrained_layout_pads(h_pad = 0.08)
#plt.savefig('../report/figures/ind_vs_city_char.png',facecolor = 'white',pad_inches =0)
plt.close()

In [25]:
gemeente_gini[['intensity','separation','scale_rel',
               'AANT_INW','share_NW_c','Gini']].corr().loc[['AANT_INW','share_NW_c','Gini'],
                                                           ['intensity','separation','scale_rel']]

Unnamed: 0,intensity,separation,scale_rel
AANT_INW,0.42289,0.154949,0.150458
share_NW_c,0.636104,0.296064,0.276571
Gini,0.157778,0.007696,0.030166


In [26]:
norm_color = matplotlib.colors.Normalize(vmin = 0,vmax = 40)

# Absolute scale vs. city population.
fig, axes = plt.subplots(1,2,figsize = (text_width/ctm,text_width/ctm*0.3), layout = 'constrained')

axes[0].scatter(gemeente['AANT_INW'],
            gemeente['scale'],
            c = gemeente['share_NW_c'] * 100,
            cmap = 'gnuplot_r',
            norm = norm_color,
            edgecolors='Black')


axes[0].set_xlabel('City population [inhabitants]')
axes[0].set_xscale('log')
axes[0].set_yscale('log')

axes[1].scatter(gemeente['AANT_INW']/1000,
            gemeente['scale']/1000,
            c = gemeente['share_NW_c'] * 100,
            cmap = 'gnuplot_r',
            norm = norm_color,
            edgecolors='Black')

axes[1].set_ylabel('Scale [thousands inhabitants]')
axes[1].set_xlabel('City population [thousands inhabitants]')
axes[1].yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
axes[1].xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))

cb = fig.colorbar(matplotlib.cm.ScalarMappable(norm = norm_color,cmap = 'gnuplot_r'), 
                  ax = axes[:],
                  location = 'right', 
                  aspect = 20, pad = 0.01)
cb.set_label('Share of individuals with \n a non-Western migration background [%]',
             size = 12)

#plt.savefig('../report/figures/abs_scale_vs_pop.svg', facecolor = 'white',pad_inches =0)
plt.close()