In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

import scipy.stats as stats
import statsmodels.formula.api as sm

from datetime import datetime
from covis.utils import get_project_root

pd.set_option("display.max_column", None)
sns.set_style("white")
sns.set_palette("tab20")

# traffic movement

In [None]:
traffic = pd.read_excel(
    get_project_root() / "data/trafficcameraactivitydataset050523.xlsx",
    sheet_name="Non seasonally adjusted",
    index_col=0,
    header=[0,1],
    skiprows=[2]
)

In [None]:
print(traffic.shape)
traffic.head()

In [None]:
traffic.index = pd.to_datetime(traffic.index, format="%Y/%m/%d")

In [None]:
traffic.loc["01/03/2020":"01/03/2021"]

* from the included data only London and North East match to the geographical regions that we have covid19 related data

In [None]:
traffic = traffic[["London", "North East"]]

In [None]:
# traffic.dtypes

In [None]:
for col in traffic.columns:
    traffic[col] = pd.to_numeric(traffic[col], errors="coerce")

In [None]:
# no idea why i have to use the month first date format here!
traffic.loc["03/01/2020":"03/01/2021"]["London"].plot();

In [None]:
traffic = traffic.groupby(level=0, axis=1).sum()

In [None]:
traffic["London"].plot();

In [None]:
traffic[traffic["London"] == 0]

In [None]:
traffic["London"].replace(0, np.NaN).plot();

In [None]:
deaths = pd.read_csv(
    get_project_root() / "output/covid19_weekly_regional_deaths_2020-2022.csv",
    index_col=0
)

In [None]:
# deaths

In [None]:
# deaths.dtypes

In [None]:
deaths["Week ended"] = pd.to_datetime(deaths["Week ended"], format="%Y/%m/%d")

In [None]:
deaths.set_index("Week ended", inplace=True)

In [None]:
deaths.index

In [None]:
deaths.loc["03/01/2020":"03/01/2021"]

In [None]:
deaths = deaths[["London", "North East"]]

In [None]:
sns.set_style("whitegrid")

fig = plt.figure(figsize=(9,5))
ax1 = fig.add_subplot(111)
ax2 = ax1.twinx()

ax1.set_ylim(0,500000)
ax2.set_ylim(0,2000)
for ax in [ax1, ax2]:
    ax.yaxis.set_major_locator(ticker.LinearLocator(6))


traffic["London"].loc["03/01/2020":"03/01/2021"].plot(
    ax = ax1,
    color="#4e7496",
    label="traffic movement (all modes)",
    lw=0.5
)

# remove grid to not draw over ax1:
sns.set_style("white")
deaths["London"].loc["03/01/2020":"03/01/2021"].plot(
    ax = ax2,
    color="#1b2431",
    # color="#a2653e",
    label="deaths due to covid",
)

ax1.set_ylabel("daily number of traffic movements recorded")
ax2.set_ylabel("weekly covid19 deaths")

fig.suptitle("London traffic movement and covid-19 deaths, Mar 2020 - Mar 2021")
fig.legend(loc="lower center", bbox_to_anchor=(0.5,-0.2), bbox_transform=ax1.transAxes, ncols=2)
fig.tight_layout();

In [None]:
# fig.savefig(
#     get_project_root() / "figures/traffic_deaths_time.png",
#     bbox_inches='tight'
# )

In [None]:
traffic_lon_mar_mar = traffic["London"].loc["03/01/2020":"03/01/2021"]
deaths_lon_mar_mar = deaths["London"].loc["03/01/2020":"03/01/2021"]

In [None]:
traffic_lon_mar_mar.index

In [None]:
deaths_lon_mar_mar.index

In [None]:
traffic_lon_mar_mar = pd.DataFrame(traffic_lon_mar_mar.resample("W-FRI").sum())

In [None]:
deaths_lon_mar_mar = pd.DataFrame(deaths_lon_mar_mar.resample("W-FRI").sum())

In [None]:
traffic_lon_mar_mar

In [None]:
lon_traf_deaths = traffic_lon_mar_mar.merge(
    deaths_lon_mar_mar,
    left_index=True,
    right_index=True,
    suffixes=["_weekly_traffic", "_weekly_deaths"]
)

In [None]:
lon_traf_deaths.describe()

## investigate for linear relationship

In [None]:
sns.set_style("whitegrid")
# fig = plt.figure()
# ax = fig.add_subplot(111)

fig = sns.jointplot(
    data=lon_traf_deaths[lon_traf_deaths!=0].dropna(),
    kind="scatter",
    x="London_weekly_traffic",
    y="London_weekly_deaths",
    # kind="reg"
    # ax=ax
)

fig.set_axis_labels("weekly traffic", "weekly covid19 deaths")
fig.fig.suptitle("London weekly traffic and weekly covid deaths", y=1.05);

In [None]:
# fig.savefig(
#     get_project_root() / "figures/traffic_deaths_jointplot_scatter.png"
# )

In [None]:
sns.set_style("whitegrid")
# fig = plt.figure()
# ax = fig.add_subplot(111)

fig = sns.jointplot(
    data=lon_traf_deaths[lon_traf_deaths!=0].dropna(),
    # kind="scatter",
    x="London_weekly_traffic",
    y="London_weekly_deaths",
    kind="reg"
    # ax=ax
)

fig.set_axis_labels("weekly traffic", "weekly covid19 deaths")
fig.fig.suptitle("London weekly traffic and weekly covid deaths", y=1.05);

In [None]:
# fig.savefig(
#     get_project_root() / "figures/traffic_deaths_jointplot_reg.png"
# )

### pearson correlation

In [None]:
stats.pearsonr(lon_traf_deaths["London_weekly_traffic"], lon_traf_deaths["London_weekly_deaths"])

### singnificance of the linear relationship

In [None]:
feats = lon_traf_deaths[["London_weekly_traffic", "London_weekly_deaths"]].copy()
feats.sort_values("London_weekly_traffic", inplace=True)

In [None]:
linear_model = sm.ols('London_weekly_deaths~London_weekly_traffic', feats).fit()
print(linear_model.summary(alpha=0.05))

In [None]:
print(linear_model.summary().as_latex())

In [None]:
for table in linear_model.summary().tables:
    print(table.as_latex_tabular())