# Explore simim drivers/outputs

In [None]:
import glob
import os

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from geopandas.plotting import plot_polygon_collection

In [None]:
plt.rcParams["figure.figsize"] = (10,10)

In [None]:
glob.glob("../simim/data/output/*_D*.csv")

In [None]:
lads = gpd.read_file('../simim/data/cache/Local_Authority_Districts_December_2016_Ultra_Generalised_Clipped_Boundaries_in_Great_Britain.shp')

In [None]:
lads.plot()

In [None]:
df_emp = pd.read_csv("../simim/data/arc/arc_employment__baseline.csv")
df_gva = pd.read_csv("../simim/data/arc/arc_gva__baseline.csv")

# merge to single dataframe
df = df_gva.merge(
df_emp, on=["timestep", "lad_uk_2016"], how="left"
)

baseline = df.reset_index().rename(columns={
"timestep": "YEAR", 
"lad_uk_2016": "GEOGRAPHY_CODE", 
"employment": "B_JOBS", 
"gva": "B_GVA", 
"gva_per_sector": "B_GVA"
})[[
 "YEAR", "GEOGRAPHY_CODE", "B_JOBS", "B_GVA"
]]  
baseline["B_GVA"] = baseline["B_GVA"].round(6)
# convert from 1000s jobs to jobs
baseline["B_JOBS"] = (baseline["B_JOBS"] * 1000).round().astype(int)
baseline.head()

In [None]:
key = '0-unplanned'
if key == "3-new-cities23":
    econ_key = "1-new-cities"
elif key == "4-expansion23":
    econ_key = "2-expansion"
else:
    econ_key = key
df_gva = pd.read_csv("../simim/data/arc/arc_gva__{}.csv".format(econ_key))
df_emp = pd.read_csv("../simim/data/arc/arc_employment__{}.csv".format(econ_key))
df_dwl = pd.read_csv("../simim/data/arc/arc_dwellings__{}.csv".format(key))

# merge to single dataframe
scenario = df_gva \
.merge(df_emp, on=["timestep", "lad_uk_2016"], how="left") \
.merge(df_dwl, on=["timestep", "lad_uk_2016"], how="left") \
.drop("lad16nm", axis=1) \
.rename(columns={"timestep": "YEAR", "lad_uk_2016": "GEOGRAPHY_CODE", "gva_per_sector": "GVA",
                 "employment": "JOBS",  "dwellings": "HOUSEHOLDS"})


scenario["GVA"] = scenario["GVA"].round(6)
scenario["JOBS"] = (scenario["JOBS"] * 1000).round().astype(int)  # convert from 1000s jobs to jobs
scenario["HOUSEHOLDS"] = scenario["HOUSEHOLDS"].round().astype(int)

In [None]:
access = pd.read_csv('../simim/data/arc/accessBaseline.csv').rename(columns={
    "ORIGIN_ZONE_CODE": "O_GEOGRAPHY_CODE", 
    "DESTINATION_ZONE_CODE": "D_GEOGRAPHY_CODE",
    "GENERALISED_TRAVEL_COST": "ACCESSIBILITY"
})

In [None]:
#output = pd.read_csv('../simim/data/output\\simim_gravity_ppp_scenario0-unplanned__gjh_D_GVA-D_JOBS.csv')
output = pd.read_csv('../simim/data/output\\simim_gravity_ppp_scenario0-unplanned__gjh_D_HOUSEHOLDS-D_JOBS_ACCESS-D_GVA.csv')

In [None]:
scenario.head(3), access.head(3), output.head(3)

In [None]:
def access_weighted_sum(dataset, colname, access_colname):
    new_colname = "D_{}_ACCESS".format(colname)
    # access to x[o] for each o,d 
    dataset[new_colname] = dataset["O_" + colname] * dataset[access_colname]
    # sum over o - grouping by d
    wsum = dataset[["D_GEOGRAPHY_CODE", new_colname]].groupby("D_GEOGRAPHY_CODE").sum().reset_index()

    # merge back
    dataset = dataset.merge(wsum, on="D_GEOGRAPHY_CODE") \
        .drop(new_colname + "_x", axis=1) \
        .rename({new_colname + "_y": new_colname}, axis=1)
    return dataset

In [None]:
year = 2050

# scenario
dataset = access \
.merge(
    scenario[scenario.YEAR == year].drop("YEAR", axis=1), 
    left_on="O_GEOGRAPHY_CODE", right_on="GEOGRAPHY_CODE"
) \
.drop("GEOGRAPHY_CODE", axis=1) \
.rename(columns={"GVA": "O_GVA", "JOBS": "O_JOBS", "HOUSEHOLDS": "O_HOUSEHOLDS"}) \
.merge(
    scenario[scenario.YEAR == year].drop("YEAR", axis=1), 
    left_on="D_GEOGRAPHY_CODE", right_on="GEOGRAPHY_CODE"
) \
.drop("GEOGRAPHY_CODE", axis=1) \
.rename(columns={"GVA": "D_GVA", "JOBS": "D_JOBS", "HOUSEHOLDS": "D_HOUSEHOLDS"})

# baseline
dataset = dataset \
.merge(
    baseline[baseline.YEAR == year].drop("YEAR", axis=1), 
    left_on="O_GEOGRAPHY_CODE", right_on="GEOGRAPHY_CODE"
) \
.drop("GEOGRAPHY_CODE", axis=1) \
.rename(columns={"B_GVA": "O_B_GVA", "B_JOBS": "O_B_JOBS"}) \
.merge(
    baseline[baseline.YEAR == year].drop("YEAR", axis=1), 
    left_on="D_GEOGRAPHY_CODE", right_on="GEOGRAPHY_CODE"
) \
.drop("GEOGRAPHY_CODE", axis=1) \
.rename(columns={"B_GVA": "D_B_GVA", "B_JOBS": "D_B_JOBS"})

dataset.head()

In [None]:
dataset = access_weighted_sum(dataset, "JOBS", "ACCESSIBILITY")
dataset = access_weighted_sum(dataset, "GVA", "ACCESSIBILITY")
dataset = access_weighted_sum(dataset, "B_JOBS", "ACCESSIBILITY")
dataset = access_weighted_sum(dataset, "B_GVA", "ACCESSIBILITY")
dataset.head()

In [None]:
d_data = dataset[[
    "D_GEOGRAPHY_CODE", "D_GVA", "D_JOBS", "D_HOUSEHOLDS", "D_JOBS_ACCESS", "D_GVA_ACCESS", 
    "D_B_JOBS", "D_B_GVA", "D_B_JOBS_ACCESS", "D_B_GVA_ACCESS"
]].drop_duplicates()
d_data.head()

In [None]:
eval_data = d_data \
.merge(output[output.PROJECTED_YEAR_NAME == year], left_on="D_GEOGRAPHY_CODE", right_on="GEOGRAPHY_CODE")
eval_data.head()

In [None]:
eval_geo = lads.merge(eval_data, left_on="lad16cd", right_on="GEOGRAPHY_CODE")
eval_geo.head()

In [None]:
eval_geo["JOBS_DENSITY"] = eval_geo.D_JOBS / eval_geo.st_areasha
eval_geo["LOG_JOBS_DENSITY"] = np.log(eval_geo.D_JOBS / eval_geo.st_areasha)

In [None]:
eval_geo.D_JOBS.hist(bins=100)

In [None]:
eval_geo.plot(column="LOG_JOBS_DENSITY")

In [None]:
eval_geo["LOG_JOBS_ACCESS_DENSITY"] = np.log(eval_geo.D_JOBS_ACCESS / eval_geo.st_areasha)
eval_geo["JOBS_ACCESS_DENSITY"] = eval_geo.D_JOBS_ACCESS / eval_geo.st_areasha

In [None]:
eval_geo.plot(column="LOG_JOBS_ACCESS_DENSITY")

In [None]:
eval_geo["JOBS_DIFF"] = eval_geo.D_JOBS - eval_geo.D_B_JOBS
eval_geo[eval_geo.JOBS_DIFF > 0]

In [None]:
eval_geo["GVA_DIFF"] = eval_geo.D_GVA - eval_geo.D_B_GVA
eval_geo[eval_geo.GVA_DIFF > 0]

In [None]:
eval_geo["POPD"] = eval_geo.PEOPLE / eval_geo.st_areasha
eval_geo["POPD_SNPP"] = eval_geo.PEOPLE_SNPP / eval_geo.st_areasha
eval_geo["POPD_DIFF"] = eval_geo.POPD - eval_geo.POPD_SNPP

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 10), sharex=False, sharey=False)

ax.set_aspect('equal')
ax.set_xlim([120000, 670000])
ax.set_ylim([0, 550000])
ax.set_facecolor('xkcd:cerulean')

net_out = eval_geo[eval_geo.POPD_DIFF <= 0.0]
net_in = eval_geo[eval_geo.POPD_DIFF > 0.0]

plot_polygon_collection(
    ax, 
    net_in['geometry'],
    clim=(0, np.max(net_in.POPD_DIFF)), 
    cmap="Reds",
    values=np.abs(net_in.POPD_DIFF)
)
plot_polygon_collection(
    ax, 
    net_out['geometry'],
    clim=(0, np.max(np.abs(net_out.POPD_DIFF))), 
    cmap="Blues",
    values=np.abs(net_out.POPD_DIFF)
)

ax

In [None]:
net_in = net_in.copy()
net_in["POP_DIFF"] = net_in.PEOPLE - net_in.PEOPLE_SNPP
net_in[["lad16nm", "PEOPLE", "PEOPLE_SNPP","POP_DIFF"]]

In [None]:
net_in.POP_DIFF.sum()