In [1]:
import pandas as pd
import numpy as np
import os
import warnings
import multiprocessing
import json

from shapely.geometry import Point, Polygon, MultiPolygon
from datetime import datetime, date
from scipy.spatial import distance
from sklearn.preprocessing import MinMaxScaler
from spektral.utils import normalized_laplacian

warnings.filterwarnings('ignore')

2022-03-14 04:02:23.941280: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


In [2]:
cities = ['Seoul', 'Busan', 'Daegu', 'Daejeon', 'Gwangju']
years = ['2016', '2018']
n_steps, length = 6, 12

In [3]:
with open('geojson/selected-city-gps.json') as f:
    city_geo = json.load(f)
    
with open('geojson/selected-district-gps.json') as f:
    district_geo = json.load(f)
    
with open('geojson/selected-subdistrict-gps.json') as f:
    subdistrict_geo = json.load(f)

In [4]:
def get_polygons(g, level):
    polygons = []
    np_g = np.array(g['geometry']['coordinates_gps'])
    
    for pl in g['geometry']['coordinates_gps']:
        if len(np_g.shape) == 3:
            polygons.append(Polygon(pl))
        else:
            for p in pl:
                polygons.append(Polygon(p))
                
    
    return polygons

In [5]:
for selected_city in cities:
    district_names = []
    subdistrict_names = []
    city_polygons = []
    district_polygons = []    
    subdistrict_polygons = []
    new_city_geo = []
    new_district_geo = []
    new_subdistrict_geo = []

    for city in city_geo:
        if city['properties']['CTP_ENG_NM'] == selected_city:
            new_city_geo.append(city)
            for district in district_geo:
                if district['properties']['SIG_CD'].startswith(city['properties']['CTPRVN_CD']):
                    new_district_geo.append(district)
                    for subdistrict in subdistrict_geo:
                        if subdistrict['properties']['EMD_CD'].startswith(district['properties']['SIG_CD']):
                            new_subdistrict_geo.append(subdistrict)

    for city in new_city_geo:
        polygons = get_polygons(city, 'city')
        city_polygons.append({'polygon': MultiPolygon(polygons), 'code': city['properties']['CTPRVN_CD'], 'name': city['properties']['CTP_KOR_NM'], 'eng_name': city['properties']['CTP_ENG_NM']})
    for district in new_district_geo:
        polygons = get_polygons(district, 'district')
        district_polygons.append({'polygon': MultiPolygon(polygons), 'code': district['properties']['SIG_CD'], 'name': district['properties']['SIG_KOR_NM'], 'eng_name': district['properties']['SIG_ENG_NM']})
    for subdistrict in new_subdistrict_geo:
        polygons = get_polygons(subdistrict, 'subdistrict')
        subdistrict_polygons.append({'polygon': MultiPolygon(polygons), 'code': subdistrict['properties']['EMD_CD'], 'name': subdistrict['properties']['EMD_KOR_NM'], 'eng_name': subdistrict['properties']['EMD_ENG_NM']})

    n_districts = len(district_polygons)
    A = np.zeros([n_districts, n_districts], dtype=int)

    for i in range(n_districts):
        r = district_polygons[i]
        district_names.append(r['name'])
        for j in range(n_districts):
            c = district_polygons[j]
            # check polygons' adjacency
            if r['polygon'].intersects(c['polygon']):
                A[i][j] = 1
            if selected_city == 'Busan' and r['name'] == '영도구' and c['name'] in ['서구', '중구', '남구']:
                A[i][j] = 1
            if selected_city == 'Busan' and c['name'] == '영도구' and r['name'] in ['서구', '중구', '남구']:
                A[i][j] = 1

    pd.DataFrame(A, columns=district_names, index=district_names).to_csv(f'./datasets/graph_data/{selected_city}-adjacency-district.csv')

In [6]:
for selected_city in cities:
    A = pd.read_csv(f'./datasets/graph_data/{selected_city}-adjacency-district.csv', engine='c', index_col=0)
    D = np.zeros(A.shape, dtype=int)
    for i in range(A.shape[0]):
        D[i][i] = np.sum(np.array(A), axis=1)[i]
        pd.DataFrame(D, columns=A.columns.to_list(), index=A.columns.to_list()).to_csv(f'./datasets/graph_data/{selected_city}-degree-district.csv')

In [7]:
for selected_city in cities:
    A = pd.read_csv(f'./datasets/graph_data/{selected_city}-adjacency-district.csv', engine='c', index_col=0)
    D = pd.read_csv(f'./datasets/graph_data/{selected_city}-degree-district.csv', engine='c', index_col=0)
    columns = A.columns.to_list()
    A, D = np.array(A), np.array(D)
    L = np.dot(np.dot(np.linalg.inv(np.sqrt(D)), A), np.linalg.inv(np.sqrt(D)))
    pd.DataFrame(L, index=columns, columns=columns).to_csv(f'./datasets/graph_data/{selected_city}-normalized-district.csv')

## Multi-view Graph Generation

In [9]:
def normalized_matrix(city, year, data, rel_type, metric):
    A_s = pd.read_csv(f'./datasets/graph_data/{city}-adjacency-district.csv', engine='c', index_col=0)
    n_districts = len(A_s.columns)
    
    A = np.zeros([n_districts, n_districts])
    for i in range(n_districts):
        for j in range(n_districts):
            v, u = data[:,i], data[:,j]
            A[i][j] = 1 - metric(v, u)
            
    m_avg = np.average(A)
    for idx, scale in enumerate([0.25, 0.5, 1., 2., 4.]):
        eps = m_avg * scale
        for i in range(n_districts):
            for j in range(n_districts):
                A[i][j] = 1 if A[i][j] >= eps else 0

        D = np.zeros(A.shape, dtype=int)
        for i in range(A.shape[0]):
            D[i][i] = np.sum(np.array(A), axis=1)[i]

        columns = A_s.columns.to_list()
        L = np.dot(np.dot(np.linalg.inv(np.sqrt(D)), A), np.linalg.inv(np.sqrt(D)))
        pd.DataFrame(L, index=columns, columns=columns).to_csv(f'./datasets/graph_data/{city}-{year}-{rel_type}-normalized-district-{metric.__name__}-{idx}.csv')

In [10]:
for year in years:
    for city in ['Seoul']:#cities:
        print('\n', year, '-', city)
        for metric in [distance.jaccard]:#[distance.cosine, distance.jaccard, distance.euclidean, distance.cityblock, distance.correlation]:
            # static spatial distance
            A = pd.read_csv(f'./datasets/graph_data/{city}-adjacency-district.csv', engine='c', index_col=0)
            road_data = pd.read_csv(f'./datasets/roads/{city}-{year}-district-road-count.csv').drop(columns=['attribute'])[A.columns].to_numpy()
            demo_data = pd.read_csv(f'./datasets/demographic/{city}-{year}-district.csv').drop(columns=['index'])[A.columns].to_numpy()
            poi_data = pd.read_csv(f'./datasets/poi/{city}-{year}-district.csv').drop(columns=['location'])[A.columns].to_numpy()

            normalized_matrix(city, year, MinMaxScaler().fit_transform(poi_data), 'poi', metric)
            normalized_matrix(city, year, MinMaxScaler().fit_transform(demo_data), 'demo', metric)
            normalized_matrix(city, year, MinMaxScaler().fit_transform(road_data), 'road', metric)


 2016 - Seoul

 2018 - Seoul


In [11]:
for year in years:
    for city in ['Seoul']:#cities:
        print('\n', year, '-', city)
        for metric in [distance.jaccard]:#[distance.cosine, distance.jaccard, distance.euclidean, distance.cityblock, distance.correlation]:
            for length in [12]:#[6, 12, 18]:
                for idx, scale in enumerate([0.25, 0.5, 1., 2., 4.]):
                    # dynamic spatial distance
                    A = pd.read_csv(f'./datasets/graph_data/{city}-adjacency-district.csv', engine='c', index_col=0)

                    risk_data = pd.read_csv(f'./datasets/risk_scores/{city}-{year}-district-hour-risk.csv')[A.columns].to_numpy()
                    volume_data = pd.read_csv(f'./datasets/traffic_volume/{city}-{year}.csv').drop(columns=['date', 'hour'])[A.columns].to_numpy()
                    speed_data = pd.read_csv(f'./datasets/traffic_speed/{city}-{year}.csv').drop(columns=['date', 'hour'])[A.columns].to_numpy()
                    dtg_data = pd.read_csv(f'./datasets/dangerous_cases/{city}-{year}-date-hour-district-new.csv')[['district', 'OS', 'RA', 'QS', 'RD', 'SS', 'SLC', 'SO', 'ST', 'SUT']]

                    volume_data = MinMaxScaler().fit_transform(volume_data)
                    speed_data = MinMaxScaler().fit_transform(speed_data)

                    districts = list(A.columns)
                    n_districts = len(districts)
                    traffic_data = np.zeros([risk_data.shape[0], len(districts), 2 + 9])

                    for t in range(720):
                        for i, di in enumerate(districts):
                            dtg = [0]*9 if dtg_data[dtg_data['district'] == di].to_numpy()[t, 1:].sum() == 0 else dtg_data[dtg_data['district'] == di].to_numpy()[t, 1:] / dtg_data[dtg_data['district'] == di].to_numpy()[t, 1:].sum()
                            traffic_data[t][i] = np.concatenate([[volume_data[t][i], speed_data[t][i]], dtg])

                    A_traffic = np.zeros([720, n_districts, n_districts])

                    for t in range(720):
                        for i, di in enumerate(districts):
                            for j, dj in enumerate(districts):
                                vt, ut = traffic_data[t][i], traffic_data[t][j]
                                A_traffic[t][i][j] = 1 - metric(vt, ut)

                        m_avg = np.average(A_traffic[t])
                        eps = m_avg * scale
                        for i, di in enumerate(districts):
                            for j, dj in enumerate(districts):
                                A_traffic[t][i][j] = 1 if A_traffic[t][i][j] >= eps else 0


                    # train-test split
                    A_traffic_train, A_traffic_test = [], []
                    for i in range(length, 721-n_steps):
                        if i <= (24*24): # before date 25th
                            A_traffic_train.append(A_traffic[i-length:i, :n_districts])
                        else:
                            A_traffic_test.append(A_traffic[i-length:i, :n_districts])

                    A_traffic_train, A_traffic_test = np.array(A_traffic_train), np.array(A_traffic_test)

                    D_traffic_train = np.zeros(A_traffic_train.shape, dtype=int)
                    for t in range(A_traffic_train.shape[0]):
                        for l in range(length):
                            for i in range(n_districts):
                                D_traffic_train[t][l][i][i] = np.sum(np.array(A_traffic_train[t][l]), axis=1)[i]

                    L = np.zeros(A_traffic_train.shape, dtype=float)
                    for t in range(A_traffic_train.shape[0]):
                        for l in range(length):
                            A, D = np.array(A_traffic_train), np.array(D_traffic_train)
        #                     D[t][l] = D[t][l] + 0.0000001 * np.random.rand(D[t][l].shape[0], n_districts)
                            INV_D = np.linalg.inv(np.sqrt(D[t][l] + 0.0000001 * np.random.rand(D[t][l].shape[0], n_districts)))
                            L[t][l] = np.dot(np.dot(INV_D, A[t][l]), INV_D)

                    with open(f'datasets/graph_data/{city}-{year}-traffic-district-normalized-train-{metric.__name__}-{length}-{idx}.npy', 'wb') as f:
                        np.save(f, L)

                    D_traffic_test = np.zeros(A_traffic_test.shape, dtype=int)
                    for t in range(A_traffic_test.shape[0]):
                        for l in range(length):
                            for i in range(n_districts):
                                D_traffic_test[t][l][i][i] = np.sum(np.array(A_traffic_test[t][l]), axis=1)[i]

                    L = np.zeros(A_traffic_test.shape, dtype=float)
                    for t in range(A_traffic_test.shape[0]):
                        for l in range(length):
                            A, D = np.array(A_traffic_test), np.array(D_traffic_test)
        #                     D[t][l] = D[t][l] + 0.0000001 * np.random.rand(D[t][l].shape[0], n_districts)
                            INV_D = np.linalg.inv(np.sqrt(D[t][l] + 0.0000001 * np.random.rand(D[t][l].shape[0], n_districts)))
                            L[t][l] = np.dot(np.dot(INV_D, A[t][l]), INV_D)

                    with open(f'datasets/graph_data/{city}-{year}-traffic-district-normalized-test-{metric.__name__}-{length}-{idx}.npy', 'wb') as f:
                        np.save(f, L)


 2016 - Seoul

 2018 - Seoul
