In [None]:
import geopandas as gpd
import pandas as pd
from scipy.stats import norm 
import numpy as np
from shapely.geometry import Polygon
import string
from sklearn.cluster import AgglomerativeClustering

In [None]:
zones = gpd.read_file("../data/processed_data/zones_delineation/PC_Leiden.gpkg")

In [None]:
zones.area.median()

In [None]:
households_data = pd.read_csv('../data/toy_example/toy_example.csv', names= list(range(0,27)))

In [None]:
households_data = households_data.applymap(lambda x: x + norm.rvs(loc = 0, scale = 0.5))
households_data = households_data.applymap(lambda x: 1 if x>0.5 else 0)
#toy_example = toy_example.applymap(lambda x: 0 if x<0 else x)

In [None]:
households_data

In [None]:
# Making an alphabetical index for the city.
list_char=list(string.ascii_uppercase)
alphabet=list_char+[x+y for x in list_char for y in list_char]
name_columns_1 = alphabet[0:13]
# name_columns_2 = [sub + '_2' for sub in alphabet[0:13]]

In [None]:
city_1 = pd.DataFrame(data = [[np.mean(households_data.loc[2*i:2*i+1,2*j:2*j+1].mean().mean()) for i in range(13)] for j in range(13)],
                      columns = name_columns_1,
                      index = name_columns_1)
                      
city_2 = pd.DataFrame(data = [[np.mean(households_data.loc[2*i+1:2*i+2,2*j+1:2*j+2].mean().mean()) for i in range(13)] for j in range(13)],
                      columns = name_columns_1,
                      index = name_columns_1)
                      #columns = name_columns_2,
                      #index = name_columns_2)

In [None]:
def compute_res_mix(city_mix):
    res_mix = city_mix.T.stack().reset_index()
    res_mix = res_mix.rename(columns = {0:'res_mix'})
    res_mix['polygon_id'] = res_mix['level_0'] + res_mix['level_1']
    res_mix = res_mix.drop(columns = ['level_0','level_1'])
    return res_mix

In [None]:
# Generation of a grid.
def create_grid_geometry(city_mix, size_cell):

    N_rows = len(city_mix)
    polygons = []
    polygon_id = []

    for x in range(N_rows):
        for y in range(N_rows):
            polygons.append(Polygon([(size_cell*x,size_cell*y), 
                                    (size_cell*(x+1), size_cell*y), 
                                    (size_cell*(x+1), size_cell*(y+1)),
                                    (size_cell*x, size_cell*(y+1))]))
            polygon_id.append(name_columns_1[x] + name_columns_1[y])

    grid = gpd.GeoDataFrame({'polygon_id':polygon_id,'geometry':polygons})
    
    centroids = grid.copy()
    centroids['geometry'] = grid.centroid

    return grid, centroids

In [None]:
def compute_exposure(res_mix, grid):

    # Computing the shortest paths from cell to cell.
    shortest_paths = pd.DataFrame({'from_polygon':[],'to_polygon':[],'distance':[]})

    for i in res_mix['polygon_id']:
        shortest_paths_i = pd.DataFrame({'from_polygon':[i]*len(res_mix),
                                         'to_polygon':res_mix['polygon_id'],
                                         'distance':grid.distance(grid.loc[res_mix['polygon_id'] == i,'geometry'].values[0])})
        shortest_paths = pd.concat([shortest_paths,shortest_paths_i], ignore_index=True)
    
    shortest_paths['weight'] = 1
    shortest_paths['weight'] = shortest_paths['weight'].mask(shortest_paths['distance'] > 0,
                                                             1/shortest_paths['distance']**2)

    exposure = res_mix.merge(shortest_paths[['from_polygon','to_polygon','weight']], 
                             left_on = 'polygon_id',
                             right_on = 'from_polygon')
    exposure = exposure.drop(columns = ['from_polygon','polygon_id'])
    exposure = exposure.rename(columns = {'to_polygon':'polygon_id'})
    exposure['exposure'] = exposure['res_mix']*exposure['weight']
    exposure = exposure[['polygon_id','weight','exposure']].groupby(by = 'polygon_id').sum().reset_index()
    exposure['exposure'] = exposure['exposure'] / exposure['weight']

    exposure = exposure.merge(res_mix, on ='polygon_id')

    return exposure

In [None]:
res_mix_1 = compute_res_mix(city_1)
grid, centroids = create_grid_geometry(city_1, 3)
exposure_1 = compute_exposure(res_mix_1, centroids)
res_mix_2 = compute_res_mix(city_2)
exposure_2 = compute_exposure(res_mix_2, centroids)

In [None]:
exposure_1 = exposure_1.add_suffix('_1').rename(columns = {'polygon_id_1':'polygon_id'})
exposure_2 = exposure_2.add_suffix('_2').rename(columns = {'polygon_id_2':'polygon_id'})

In [None]:
grid = grid.merge(exposure_1[['polygon_id','exposure_1','res_mix_1']], on = 'polygon_id')
grid = grid.merge(exposure_2[['polygon_id','exposure_2','res_mix_2']], on = 'polygon_id')

In [None]:
grid.plot('res_mix_2')

In [None]:
grid.plot('res_mix_1', vmin=0,vmax=1)

In [None]:
grid.plot('exposure_1', vmin=0,vmax=1)

In [None]:
grid.plot('exposure_2', vmin=0,vmax=1)

In [None]:
AgglomerativeClustering(n_clusters=None,
                        connectivity=adjacency_matrix,
                        linkage = 'ward').fit(X)

What remains to be done:
- Compute regions in the benchmark
- Compute regions in the exposure
- Compare
- Compute the exposure with the fine scale data and compare