In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
import tqdm
from matplotlib.colors import LogNorm
from mpl_toolkits.mplot3d import Axes3D
from scipy import stats

# Compute the random travel matrix from CSO electoral division commuting data

In [4]:
ed_travel_df = pd.read_csv("../data/processed/ed_travel_data.csv")
pop_ratio_mat = np.loadtxt('../data/processed/ed_soa_pop_ratio_mat.csv', delimiter=',')
dist_mat = np.loadtxt('../data/processed/ed_soa_dist_mat.csv', delimiter=',')

In [5]:
no_commuters = ed_travel_df['No. of Commuters'].to_numpy()
pop_ratio = ed_travel_df['pop_ratio'].to_numpy()
dist = ed_travel_df['Distance'].to_numpy()

dist = np.repeat(dist, no_commuters)
log_dist = np.log10(dist)
pop_ratio = np.repeat(pop_ratio, no_commuters)
log_pop_ratio = np.log10(pop_ratio)

In [6]:
values = np.vstack([log_dist, log_pop_ratio])
kernel = stats.gaussian_kde(values)

In [8]:
x, y = np.linspace(0, 3, 40),  np.linspace(-3, 3, 40)
X, Y = np.meshgrid(x, y, indexing='ij')

positions = np.vstack([X.ravel(), Y.ravel()])
Z = kernel(positions)

In [13]:
bin_area = abs((x[1]-x[0])*(y[1]-y[0]))
Z = Z*bin_area
Zr = Z.reshape(X.shape)

In [32]:
prob_mat = np.zeros_like(dist_mat)

for i in range(dist_mat.shape[0]):
    bx = np.digitize(np.log10(dist_mat[i]), x)
    by = np.digitize(np.log10(pop_ratio_mat[i]), y)
    prob_mat[i,:] = Zr[bx,by]*(1/np.sum(Zr[bx,by]))

  bx = np.digitize(np.log10(dist_mat[i]), x)


In [33]:
np.savetxt('../data/processed/ed_soa_travel_prob_mat.csv', prob_mat, delimiter=',', fmt = '%.10f')