In [2]:
import json
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
import pickle
import math
import geopandas as gpd

In [8]:
od_data = np.load('od_matrix.npy')
duration_data = np.load('duration_matrix.npy')
distance_data = np.load('distance_matrix.npy')
# remove transit zones
od_data = od_data[:-3, :-3, :]
duration_data = duration_data[:-3, :-3, :]
distance_data = distance_data[:-3, :-3, :]
# set remaing nans to avg
avg_duration = int(np.nanmean(duration_data))
duration_data[np.isnan(duration_data)] = avg_duration
avg_distance = int(np.nanmean(distance_data))
distance_data[np.isnan(distance_data)] = avg_distance

# 5 clusters
parts = np.zeros(190, dtype=int) - 1
for cluster in range(1,6):
    shapefile_path = f"./5_zones/{cluster}.shp"
    df = gpd.read_file(shapefile_path)
    for df_row in range(df.shape[0]):
        parts[df["id"][df_row]-1] = cluster-1
# print(parts)
parts_5 = np.delete(parts, 41)
print(parts_5)
print(parts_5.shape)

# 10 clusters
parts = np.zeros(190, dtype=int) - 1
for cluster in range(1,11):
    shapefile_path = f"./10_zones/{cluster}.shp"
    df = gpd.read_file(shapefile_path)
    for df_row in range(df.shape[0]):
        parts[df["id"][df_row]-1] = cluster-1
# print(parts)
parts_10 = np.delete(parts, 41)
print(parts_10)
print(parts_10.shape)

# 20 clusters
parts = np.zeros(190, dtype=int) - 1
for cluster in range(1,21):
    shapefile_path = f"./20_zones/{cluster}.shp"
    df = gpd.read_file(shapefile_path)
    for df_row in range(df.shape[0]):
        parts[df["id"][df_row]-1] = cluster-1
# print(parts)
parts_20 = np.delete(parts, 41)
print(parts_20)
print(parts_20.shape)

    cluster   id
0         1  131
1         1  132
2         1  133
3         1  134
4         1  162
5         1  163
6         1  164
7         1  165
8         1  166
9         1  167
10        1  168
11        1  169
12        1  170
13        1  171
14        1  172
15        1  173
16        1  182
17        1  183
18        1  184
19        1  185
20        1  186
21        1  187
22        1  188
23        1  189
24        1  190


In [11]:
def get_clustered_matrix(parts, number_clusters, number_of_transit_zones, matrix_type, data_path: str):
    data = np.load(data_path)
    data = data[:-3, :-3, :]
    time = data.shape[2]
    clustered_matrix = np.zeros((number_clusters+number_of_transit_zones, number_clusters+number_of_transit_zones, time), dtype=int)
    normalization_matrix = np.zeros((number_clusters+number_of_transit_zones, number_clusters+number_of_transit_zones, time))
    
    for origin_area in range(data.shape[0]):
        if origin_area == 41:
            continue
        if origin_area > 41:
            parts_origin = origin_area-1
        else:
            parts_origin = origin_area
        for destination_area in range(data.shape[1]):
            if destination_area == 41:
                continue
            if destination_area > 41:
                parts_dest = destination_area-1
            else:
                parts_dest = destination_area
            for hour in range(time):
                if  not np.isnan(data[origin_area, destination_area, hour]):
                    clustered_matrix[parts[parts_origin], parts[parts_dest], hour] += data[origin_area, destination_area, hour]
                    normalization_matrix[parts[parts_origin], parts[parts_dest], hour] += 1
    
    if matrix_type == 'duration' or matrix_type == 'distance':
        clustered_matrix = (clustered_matrix/normalization_matrix).round(1)
    # print(clustered_matrix)
    np.save(str(number_clusters)+'_clusters/' + data_path, clustered_matrix)
    return clustered_matrix

# create corresponding od matrix
number_clusters = 5
number_of_transit_zones = 0
clustered_od_matrix = get_clustered_matrix(parts_5, number_clusters, number_of_transit_zones,'od', 'od_matrix.npy')
clustered_od_matrix = get_clustered_matrix(parts_5, number_clusters, number_of_transit_zones,'duration', 'duration_matrix.npy')
clustered_od_matrix = get_clustered_matrix(parts_5, number_clusters, number_of_transit_zones,'distance', 'distance_matrix.npy')

# create corresponding od matrix
number_clusters = 10
number_of_transit_zones = 0
clustered_od_matrix = get_clustered_matrix(parts_10, number_clusters, number_of_transit_zones,'od', 'od_matrix.npy')
clustered_od_matrix = get_clustered_matrix(parts_10, number_clusters, number_of_transit_zones,'duration', 'duration_matrix.npy')
clustered_od_matrix = get_clustered_matrix(parts_10, number_clusters, number_of_transit_zones,'distance', 'distance_matrix.npy')

# create corresponding od matrix
number_clusters = 20
number_of_transit_zones = 0
clustered_od_matrix = get_clustered_matrix(parts_20, number_clusters, number_of_transit_zones,'od', 'od_matrix.npy')
clustered_od_matrix = get_clustered_matrix(parts_20, number_clusters, number_of_transit_zones,'duration', 'duration_matrix.npy')
clustered_od_matrix = get_clustered_matrix(parts_20, number_clusters, number_of_transit_zones,'distance', 'distance_matrix.npy')

In [13]:
od_data = np.load('5_clusters/od_matrix.npy')
print(od_data.shape)
print(od_data[:,:,8:-4].sum())

(5, 5, 24)
1743156


In [15]:
# find charge station distribution over TAZs using numpy output
# only interested on 50kW -> third output, using this for RL approach

existing_charger_distr_multiple_different_kW = np.load("UMax_charge_Justins_optim.npy")
existing_charger_distr = existing_charger_distr_multiple_different_kW[:,2] # only 50 kW
print(existing_charger_distr.sum())

map_existing_charger_clusters_to_taz =  {
    1: [56, 57],
    2: [52, 62, 65, 66],
    3: [43, 48, 49, 50, 51, 70, 71, 72],
    4: [6, 7, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 39, 40, 41, 44, 45, 46, 47, 73, 74, 75],
    5: [1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 22, 23, 24, 25, 26, 37, 38, 42],
    6: [54, 55, 58, 59, 60],
    7: [53, 61, 63, 64, 67, 90],
    8: [68, 69, 83, 84, 85, 86, 87, 88, 89],
    9: [8, 9, 10, 11, 20, 76, 77, 78, 79, 80, 81, 82, 104, 105, 106, 107],
    10: [18, 19, 21, 108, 109, 110],
    11: [178, 179, 180, 181, 184],
    12: [172, 173, 174, 175, 176, 177, 185],
    13: [91, 92, 93, 94, 95, 96, 129, 171],
    14: [97, 98, 99, 100, 101, 102, 103, 116, 117, 118, 119, 122, 123, 128],
    15: [111, 112, 113, 114, 115, 120, 121, 142],
    16: [182, 183, 186, 187],
    17: [169, 170, 188],
    18: [130, 131, 132, 133, 134],
    19: [124, 125, 126, 127, 135, 136, 137, 138, 152],
    20: [139, 140, 141, 143, 144, 145, 146, 147, 150],
    21: [190],
    22: [168, 189],
    23: [161, 162, 163, 164, 165, 166, 167],
    24: [155, 156, 157, 158, 159, 160],
    25: [148, 149, 151, 153, 154]
    }

number_baseline_clusters = 25
# build global charger dictionary
taz_chargers = []
number_taz = 190
for area in range(number_baseline_clusters):
    taz_in_area = map_existing_charger_clusters_to_taz[area+1]
    number_taz_in_area = len(taz_in_area)
    number_chargers = int(existing_charger_distr[area]/number_taz_in_area)
    for taz in taz_in_area:
        taz_chargers.append(number_chargers)
print(sum(taz_chargers))

# create chargefiles for tasks
# top-level
taz_to_clusters = parts_20 # remove values corresponding to boundary zones
number_clusters = 20
top_level_chargers = np.zeros(number_clusters)
for taz in range(number_taz):
    if taz == 41:
        continue
    if taz > 41:
        taz -= 1
    cluster = taz_to_clusters[taz] 
    top_level_chargers[cluster] += taz_chargers[taz]
print(top_level_chargers.shape)
print(top_level_chargers.sum())
print(top_level_chargers)
np.save(str(number_clusters)+'_clusters/charging_stations.npy', top_level_chargers)

     

6008.162569728143
5946
(20,)
5906.0
[372. 692. 314.  44. 252. 288. 344. 400. 267. 454.  52. 160.  52. 254.
 282.  68. 225. 219. 691. 476.]


In [122]:
# plotting clusters:
from geopandas.plotting import plot_polygon_collection

number_clusters = 5
shapefile_path = f"./{number_clusters}_zones/1.shp"
city = gpd.read_file(shapefile_path)
city["cluster"] = 1
for cluster in range(2,number_clusters+1):
    shapefile_path = f"./{number_clusters}_zones/{cluster}.shp"
    df = gpd.read_file(shapefile_path)
    df["cluster"] = cluster
    city = city.append(df)
# print(city)
fig, ax = plt.subplots(figsize=(13, 6.5))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
y_ticks = [i for i in range(number_clusters+1)]
city.plot(ax=ax, column='cluster', vmax=number_clusters, legend=True, legend_kwds={'ticks': y_ticks})
plot_polygon_collection(ax, city['geometry'], vmax=number_clusters, values=city['cluster'])
# plt.xlim((-122.525, -122.35))
# plt.ylim((37.7, 37.850))
plt.xticks([])
plt.yticks([])
plt.savefig(str(number_clusters)+'_clusters/cluster_plot.pdf')
# plt.show()
plt.close()

# number_clusters = 10
# shapefile_path = f"./{number_clusters}_zones/1.shp"
# city = gpd.read_file(shapefile_path)
# city["cluster"] = 1
# for cluster in range(2,number_clusters+1):
#     shapefile_path = f"./{number_clusters}_zones/{cluster}.shp"
#     df = gpd.read_file(shapefile_path)
#     df["cluster"] = cluster
#     city = city.append(df)
# # print(city)
# fig, ax = plt.subplots(figsize=(13, 6.5))
# ax.spines['top'].set_visible(False)
# ax.spines['right'].set_visible(False)
# ax.spines['bottom'].set_visible(False)
# ax.spines['left'].set_visible(False)
# y_ticks = [i for i in range(number_clusters+1)]
# city.plot(ax=ax, column='cluster', vmax=number_clusters, legend=True, cmap=plt.get_cmap("tab10"), legend_kwds={'ticks': y_ticks})
# plot_polygon_collection(ax, city['geometry'], vmax=number_clusters, values=city['cluster'], cmap=plt.get_cmap("tab10"))
# plt.xticks([])
# plt.yticks([])
# plt.savefig(str(number_clusters)+'_clusters/cluster_plot.pdf')
# # plt.show()
# plt.close()

# number_clusters = 20
# shapefile_path = f"./{number_clusters}_zones/1.shp"
# city = gpd.read_file(shapefile_path)
# city["cluster"] = 1
# for cluster in range(2,number_clusters+1):
#     shapefile_path = f"./{number_clusters}_zones/{cluster}.shp"
#     df = gpd.read_file(shapefile_path)
#     df["cluster"] = cluster
#     city = city.append(df)
# # print(city)
# fig, ax = plt.subplots(figsize=(13, 6.5))
# ax.spines['top'].set_visible(False)
# ax.spines['right'].set_visible(False)
# ax.spines['bottom'].set_visible(False)
# ax.spines['left'].set_visible(False)
# y_ticks = [i for i in range(number_clusters+1)]
# city.plot(ax=ax, column='cluster', vmax=number_clusters, legend=True, cmap=plt.get_cmap("tab20"), legend_kwds={'ticks': y_ticks})
# plot_polygon_collection(ax, city['geometry'], vmax=number_clusters, values=city['cluster'], cmap=plt.get_cmap("tab20"))
# plt.xticks([])
# plt.yticks([])
# plt.savefig(str(number_clusters)+'_clusters/cluster_plot.pdf')
# # plt.show()
# plt.close()

  aout[:] = out
  for poly in geom:
  for t in polygon])
  for poly in geom:
  for t in polygon])
