In [73]:
import pandas as pd
import geopandas as gpd
import numpy as np
import shapely

The population ratio is taken to be $\frac{\text{destinaiton population}}{\text{source population}}$.

In [65]:
vert_df = pd.read_csv("../data/processed/ed_soa_data_frame.csv")
vert_df = gpd.GeoDataFrame(vert_df)

vert_df['geometry'] = vert_df['geometry'].apply(shapely.wkt.loads)
vert_df['centroid'] = vert_df['centroid'].apply(shapely.wkt.loads)

In [66]:
def distance_haversine(lat1, lon1, lat2, lon2):
    # approximate radius of earth in km
    R = 6373.0 * 1000

    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    lat2 = np.radians(lat2)
    lon2 = np.radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    distance = R * c

    return distance

In [67]:
pop_arr = vert_df.population.to_numpy()
dist_mat = np.zeros((len(pop_arr), len(pop_arr)))
pop_ratio_mat = np.zeros((len(pop_arr), len(pop_arr)))


lat_arr = np.array([p.y for  p in vert_df['centroid']])
lon_arr = np.array([p.x for  p in vert_df['centroid']])

for src in range(len(pop_arr)): 
    dist_mat[src,:] = distance_haversine(lat_arr[src], lon_arr[src], lat_arr, lon_arr)/1000.0
    pop_ratio_mat[src,:] = pop_arr/pop_arr[src]

In [68]:
np.savetxt('../data/processed/ed_soa_pop_ratio_mat.csv', pop_ratio_mat, delimiter=',', fmt = '%f')
np.savetxt('../data/processed/ed_soa_dist_mat.csv', dist_mat, delimiter=',', fmt = '%f')

In [82]:
ed_travel_df = pd.read_csv("../data/raw/ED_Used_Link_Info.csv")

In [83]:
ed_df = pd.read_csv("../data/raw/ED_Basic_Info.csv")
ed_df.index = ed_df['Electoral Division']

In [84]:
ed_travel_df = ed_travel_df[ed_travel_df['To Electoral Division']!='No fixed place of work']
ed_travel_df = ed_travel_df[ed_travel_df['To Electoral Division']!='Work/school from home']
ed_travel_df = ed_travel_df[ed_travel_df['Distance']>1]

ed_travel_df.index = ed_travel_df['Electoral Division']
ed_travel_df['src_pop'] = ed_df.loc[edge_df['Electoral Division']].Population
ed_travel_df.index = ed_travel_df['To Electoral Division']
ed_travel_df['dst_pop'] = ed_df.loc[edge_df['To Electoral Division']].Population
ed_travel_df.reset_index(drop=True, inplace = True)

ed_travel_df['pop_ratio'] = ed_travel_df['dst_pop']/ed_travel_df['src_pop']
ed_travel_df['commuter_prop'] = ed_travel_df['No. of Commuters']/ed_travel_df['src_pop']

In [86]:
ed_travel_df.to_csv('../data/processed/ed_travel_data.csv', index=False)