In [None]:
import warnings
warnings.filterwarnings('ignore') # hide warnings
import os
import numpy as np
import sys
sys.path.append("..")
from pickle import dump
from dyntapy.supply_data import get_toy_network, relabel_graph
from dyntapy.demand_data import add_centroids, od_graph_from_matrix
from dyntapy.visualization import show_network, show_demand
from dyntapy.assignments import StaticAssignment

In [None]:
import geopandas as gpd
import numpy as np
from sklearn.cluster import KMeans

# settings
city = "BRUSSEL"
radius1 = "10-20"
radius2 = "20-40"
groupSize1 = 4
groupSize2 = 16

# TODO make code more waterproof (naming of input and outputfile)
def aggregate_zones(city = city, radius = radius1, idealNbZonesPerCluster = groupSize1):
    '''
    original shapefile will change: 'cluster' column containing the cluster to which each zone belongs
    new shapefill will be created: shapefile consisting of aggregated zones

    Note:
    code not waterproof: always looks in folder "STA_prep/shapefile_data/{city}_40_10/"
    '''
    inputFile = f"STA_prep/shapefile_data/{city}_40_10/{city}_40_10_{radius}.shp"
    outputFile = f"STA_prep/shapefile_data/{city}_40_10/{city}_40_10_{radius}_knn.shp"
    shapefile = gpd.read_file(inputFile)

    points = np.array(shapefile.centroid.apply(lambda p: [p.x, p.y]).tolist())

    k =  int(np.ceil(len(shapefile) / idealNbZonesPerCluster))  # number of clusters to create
    print(f'Aggregation of {len(shapefile)} zones into {k} clusters.')
    kmeans = KMeans(n_clusters=k, random_state=0).fit(points)

    shapefile["cluster"] = kmeans.labels_
    aggregated = shapefile.dissolve(by="cluster").reset_index()
    basicStatisticsZoneSizes = shapefile.groupby('cluster').size().describe() # contains
    print(f"Basic statistics of zone aggregation:\n{basicStatisticsZoneSizes}\n")


    # Save the new shapefile to a file
    shapefile.to_file(inputFile)
    aggregated.to_file(outputFile)

    # Return aggregated shapefile
    return aggregated

In [None]:
agg_shapefile1 = aggregate_zones()
agg_shapefile2 = aggregate_zones(radius = radius2, idealNbZonesPerCluster = groupSize2)

In [None]:
# After-market fix: add 'cluster' column to unclustered 0-10 radius such that later we have a cluster value for all zones in radius 0-40
shapefile0 = gpd.read_file(f'STA_prep/shapefile_data/{city}_40_10/{city}_40_10_0-10.shp')
shapefile0['cluster'] = range(shapefile0.shape[0])
shapefile0.to_file(f'STA_prep/shapefile_data/{city}_40_10/{city}_40_10_0-10_knn.shp') # no k-nearest neighbour, but just for naming convention


In [None]:
# Concatenate shapefiles
import pandas as pd
shapefile1 = gpd.read_file(f'STA_prep/shapefile_data/{city}_40_10/{city}_40_10_0-10_knn.shp')
shapefile2 = gpd.read_file(f'STA_prep/shapefile_data/{city}_40_10/{city}_40_10_{radius1}_knn.shp')
shapefile3 = gpd.read_file(f'STA_prep/shapefile_data/{city}_40_10/{city}_40_10_{radius2}_knn.shp')

# add scalar to cluster numbers before concatenating shapefiles into combined shapefile
shapefile2['cluster'] = shapefile2['cluster'] + (shapefile1.cluster.max()+1)
shapefile3['cluster'] = shapefile3['cluster'] + (shapefile2.cluster.max()+1)

# combine aggregated shapefiles into combined shapefile
combined_shapefile = gpd.GeoDataFrame(pd.concat([shapefile1, shapefile2, shapefile3]))
combined_shapefile.to_file('STA_prep/shapefile_data/BRUSSEL_40_10/BRUSSEL_40_10_aggr_comb.shp')

#### Try OD matrix

In [None]:
# OD matrix
original_od_matrix = pd.read_excel("STA_prep/od_matrix_data/BRUSSEL_40_9_.xlsx") # TODO define dynamically

# mean geometry and summed demand in each cluster
# clustered_zones = combined_shapefile.groupby("cluster").agg({"geometry": "mean", "demand": "sum"})

# Create a new origin-destination matrix with aggregated zones
nbAggZones = len(combined_shapefile.groupby("cluster"))
aggregated_od_matrix = np.zeros((nbAggZones, nbAggZones))

# for i in range(nbAggZones):
#     for j in range(nbAggZones):
#         # # TODO: find indices of original zones belonging to current aggregated zones 
#         # # (problem: clusters are number 0 .. nbClusters - 1 for all radiuses, so numbers will occur multiple times, even though they belong to different regions)
#         # zones_i = combined_shapefile[combined_shapefile["cluster"] == i]
#         # zones_j = combined_shapefile[combined_shapefile["cluster"] == j]
#         # indices_i = np.where(combined_shapefile["cluster"] == i) # TODO use these indices to retrieve ZONENUMMER from the correct rows
#         # indices_j = np.where(combined_shapefile["cluster"] == j) 
        
#         # sum the demand between all pairs of original zones that belong to the current aggregated zones
#         demand_sum = 0
#         for origZone_i in indices_i:
#             for origZone_j in indices_j:
#                 demand_sum += original_od_matrix[origZone_i, origZone_j]
        
#         # assign the demand sum to the current cell in the aggregated OD matrix
#         aggregated_od_matrix[i, j] = demand_sum


# # Save the new origin-destination matrix to an excel file
# aggregated_od_matrix.to_excel("STA_prep/od_matrix_data/BRUSSEL_40_9_aggregated.xlsx")

In [None]:
original_od_matrix.head()


In [None]:
i = 1
j = 2

zones_i = combined_shapefile[combined_shapefile["cluster"] == i]
zones_j = combined_shapefile[combined_shapefile["cluster"] == j]
i_indices = np.where(combined_shapefile["cluster"] == i)
j_indices = np.where(combined_shapefile["cluster"] == j)
print(zones_i)
print(zones_j)