# Plot inter-LAD accessibility

In [None]:
import os

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from geopandas.plotting import plot_polygon_collection

In [None]:
plt.rcParams["figure.figsize"] = (10,10)

In [None]:
access_base = pd.read_csv("../simim/data/arc/accessBaseline.csv") \
    .sort_values(by=['ORIGIN_ZONE_CODE', 'DESTINATION_ZONE_CODE'])
access_scen = pd.read_csv("../simim/data/arc/accessScenario3.csv") \
    .sort_values(by=['ORIGIN_ZONE_CODE', 'DESTINATION_ZONE_CODE'])

In [None]:
access_base.head()

In [None]:
od_base = access_base.pivot(index='ORIGIN_ZONE_CODE', columns='DESTINATION_ZONE_CODE').to_numpy()
od_scen = access_scen.pivot(index='ORIGIN_ZONE_CODE', columns='DESTINATION_ZONE_CODE').to_numpy()

In [None]:
def plot_cdf(ser):
    """Plot CDF of a series
    
    See https://stackoverflow.com/questions/25577352/plotting-cdf-of-a-pandas-series-in-python/#answer-31971245
    """
    # sort ascending
    ser = ser.sort_values()

    # append again the last (and largest) value in order to get an unbiased CDF
    ser[len(ser)] = ser.iloc[-1]

    # Create a new series with the sorted values as index and the cumulative distribution as values:
    cum_dist = np.linspace(0.,1.,len(ser))
    ser_cdf = pd.Series(cum_dist, index=ser)

    # Finally, plot the function as steps
    ax = ser_cdf.plot(drawstyle='steps')
    return ax

In [None]:
plot_cdf(access_base.GENERALISED_TRAVEL_COST)

In [None]:
diff = (access_scen.GENERALISED_TRAVEL_COST - access_base.GENERALISED_TRAVEL_COST)
diff = diff[diff > 1e-04]  # zoom in on more-than-minute differences
plot_cdf(diff)

In [None]:
# using seaborn heatmap (gives labels and colorbar)
sns.heatmap(access_base.pivot(index='ORIGIN_ZONE_CODE', columns='DESTINATION_ZONE_CODE', values='GENERALISED_TRAVEL_COST'))    

In [None]:
# plain numpy array option (slightly faster)
plt.imshow(od_scen - od_base)

## Focus on ARC accessibilty

In [None]:
arc_lads = pd.read_csv("../simim/data/scenarios/camkox_lads.csv")

In [None]:
warc_access_base = access_base[
    access_base.ORIGIN_ZONE_CODE.isin(arc_lads.geo_code) & access_base.DESTINATION_ZONE_CODE.isin(arc_lads.geo_code)]
warc_access_scen = access_scen[
    access_scen.ORIGIN_ZONE_CODE.isin(arc_lads.geo_code) & access_scen.DESTINATION_ZONE_CODE.isin(arc_lads.geo_code)]
warc_od_base = warc_access_base.pivot(index='ORIGIN_ZONE_CODE', columns='DESTINATION_ZONE_CODE', values='GENERALISED_TRAVEL_COST')
warc_od_scen = warc_access_scen.pivot(index='ORIGIN_ZONE_CODE', columns='DESTINATION_ZONE_CODE', values='GENERALISED_TRAVEL_COST')

In [None]:
sns.heatmap(warc_od_base)

In [None]:
sns.heatmap(warc_od_scen - warc_od_base)

In [None]:
arc_access_base = access_base[
    access_base.ORIGIN_ZONE_CODE.isin(arc_lads.geo_code) | access_base.DESTINATION_ZONE_CODE.isin(arc_lads.geo_code)]
arc_access_scen = access_scen[
    access_scen.ORIGIN_ZONE_CODE.isin(arc_lads.geo_code) | access_scen.DESTINATION_ZONE_CODE.isin(arc_lads.geo_code)]

In [None]:
arc_od_base = arc_access_base.pivot(index='ORIGIN_ZONE_CODE', columns='DESTINATION_ZONE_CODE', values='GENERALISED_TRAVEL_COST')
arc_od_scen = arc_access_scen.pivot(index='ORIGIN_ZONE_CODE', columns='DESTINATION_ZONE_CODE', values='GENERALISED_TRAVEL_COST')

In [None]:
sns.heatmap(arc_od_base)

In [None]:
sns.heatmap(arc_od_scen - arc_od_base)

In [None]:
diff = (
    arc_access_scen.set_index(["ORIGIN_ZONE_CODE", "DESTINATION_ZONE_CODE"]) - 
    arc_access_base.set_index(["ORIGIN_ZONE_CODE", "DESTINATION_ZONE_CODE"])
) \
    .reset_index() \
    .merge(arc_lads, how="left", left_on="ORIGIN_ZONE_CODE", right_on="geo_code").drop("geo_code", axis=1) \
    .rename(columns={"geo_label": "O_NAME"}) \
    .merge(arc_lads, how="left", left_on="DESTINATION_ZONE_CODE", right_on="geo_code").drop("geo_code", axis=1) \
    .rename(columns={"geo_label": "D_NAME"}) \
    .sort_values(by="GENERALISED_TRAVEL_COST", ascending=False)

In [None]:
diff.head(20)