In [2]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [3]:
# get the path to the data, not in the same location as the jupyter notebook
pathName = os.path.abspath(os.getcwd()) + "\\city-of-toronto-data\\"

resultY2019sum = r"resultY2019sum.csv"

# create dataframes from csv
df_Y2019sum = pd.read_csv(pathName + resultY2019sum, parse_dates=["date"])

In [4]:
df_Y2019sum

Unnamed: 0,date,intersection_uid,intersection_name,e_thru_vol,e_left_vol,e_right_vol,w_thru_vol,w_left_vol,w_right_vol
0,2019-01-01,10,King / Bathurst,370,171,232,348,1635,997
1,2019-01-01,11,King / Portland,439,33,531,378,30,396
2,2019-01-01,12,King / Spadina,287,99,414,163,152,731
3,2019-01-01,13,King / Peter,346,56,641,330,49,616
4,2019-01-01,15,King / University,179,144,428,187,74,544
...,...,...,...,...,...,...,...,...,...
3932,2019-12-31,17,King / Bay,988,26,270,1063,23,981
3933,2019-12-31,18,King / Yonge,207,0,407,276,0,991
3934,2019-12-31,19,King / Church,135,14,209,160,27,620
3935,2019-12-31,20,King / Jarvis,121,680,397,134,17,427


In [5]:
# filters for restricted movement
allmvmt = ["e_thru_vol", "e_left_vol", "e_right_vol", "w_thru_vol", "w_left_vol", "w_right_vol"]
restrictedmvmtBathurst = ["e_thru_vol", "e_left_vol", "w_thru_vol"]
restrictedmvmtPortland = ["e_thru_vol", "e_left_vol", "w_thru_vol", "w_left_vol"]
restrictedmvmtSpadina = ["e_thru_vol", "e_left_vol", "w_thru_vol", "w_left_vol"]
restrictedmvmtPeter = ["e_thru_vol", "e_left_vol", "w_thru_vol", "w_left_vol"]
restrictedmvmtJohn = ["e_left_vol", "w_left_vol"]
restrictedmvmtSimcoe = ["e_left_vol"]
restrictedmvmtUniversity = ["e_thru_vol", "e_left_vol", "w_thru_vol", "w_left_vol"]
restrictedmvmtYork = ["w_left_vol"]
restrictedmvmtBay = ["e_left_vol", "w_left_vol"]
restrictedmvmtYonge = ["e_thru_vol", "e_left_vol", "w_thru_vol", "w_left_vol"]
restrictedmvmtChurch = ["e_thru_vol", "e_left_vol", "w_thru_vol", "w_left_vol"]
restrictedmvmtJarvis = ["e_thru_vol", "w_thru_vol", "w_left_vol"]

In [6]:
restrictions_dict = {
    0 : allmvmt,
    10 : restrictedmvmtBathurst,
    11 : restrictedmvmtPortland,
    12 : restrictedmvmtSpadina,
    13 : restrictedmvmtPeter,
    32 : restrictedmvmtJohn,
    14 : restrictedmvmtSimcoe,
    15 : restrictedmvmtUniversity,
    16 : restrictedmvmtYork,
    17 : restrictedmvmtBay,
    18 : restrictedmvmtYonge,
    19 : restrictedmvmtChurch,
    20 : restrictedmvmtJarvis
}

In [7]:
# daily restricted movement totals 2019
df_Y2019_Bathurst = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 10, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtBathurst]
df_Y2019_Bathurst["sum_violations"] = df_Y2019_Bathurst[restrictedmvmtBathurst].sum(axis=1)

df_Y2019_Portland = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 11, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtPortland]
df_Y2019_Portland["sum_violations"] = df_Y2019_Portland[restrictedmvmtPortland].sum(axis=1)

df_Y2019_Spadina = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 12, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtSpadina]
df_Y2019_Spadina["sum_violations"] = df_Y2019_Spadina[restrictedmvmtSpadina].sum(axis=1)

df_Y2019_Peter = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 13, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtPeter]
df_Y2019_Peter["sum_violations"] = df_Y2019_Peter[restrictedmvmtPeter].sum(axis=1)

df_Y2019_John = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 32, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtJohn]
df_Y2019_John["sum_violations"] = df_Y2019_John[restrictedmvmtJohn].sum(axis=1)

df_Y2019_Simcoe = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 14, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtSimcoe]
df_Y2019_Simcoe["sum_violations"] = df_Y2019_Simcoe[restrictedmvmtSimcoe].sum(axis=1)

df_Y2019_University = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 15, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtUniversity]
df_Y2019_University["sum_violations"] = df_Y2019_University[restrictedmvmtUniversity].sum(axis=1)

df_Y2019_York = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 16, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtYork]
df_Y2019_York["sum_violations"] = df_Y2019_York[restrictedmvmtYork].sum(axis=1)

df_Y2019_Bay = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 17, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtBay]
df_Y2019_Bay["sum_violations"] = df_Y2019_Bay[restrictedmvmtBay].sum(axis=1)

df_Y2019_Yonge = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 18, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtYonge]
df_Y2019_Yonge["sum_violations"] = df_Y2019_Yonge[restrictedmvmtYonge].sum(axis=1)

df_Y2019_Church = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 19, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtChurch]
df_Y2019_Church["sum_violations"] = df_Y2019_Church[restrictedmvmtChurch].sum(axis=1)

df_Y2019_Jarvis = df_Y2019sum.loc[df_Y2019sum["intersection_uid"] == 20, ["date", "intersection_uid", "intersection_name"] + restrictedmvmtJarvis]
df_Y2019_Jarvis["sum_violations"] = df_Y2019_Jarvis[restrictedmvmtJarvis].sum(axis=1)

In [8]:
df = pd.concat([df_Y2019_Bathurst, 
                    df_Y2019_Portland, 
                    df_Y2019_Spadina, 
                    df_Y2019_Peter, 
                    df_Y2019_John, 
                    df_Y2019_Simcoe, 
                    df_Y2019_University, 
                    df_Y2019_York, 
                    df_Y2019_Bay, 
                    df_Y2019_Yonge, 
                    df_Y2019_Church, 
                    df_Y2019_Jarvis], 
                ignore_index=True)
# dfsum = df.drop(columns=["e_thru_vol", "e_left_vol", "w_thru_vol", "w_left_vol"])

In [9]:
df["sum_e"] = df[["e_thru_vol", "e_left_vol"]].sum(axis=1)
df["sum_w"] = df[["w_thru_vol", "w_left_vol"]].sum(axis=1)

In [10]:
fig = px.scatter(df,
                 x="date",
                 y="sum_violations",
                 color="intersection_name",
                 labels={
                     "date" : "Date",
                     "intersection_name" : "Intersection",
                     "sum_violations" : "Daily Total Violations"
                 },
                 title="King St Corridor Total Daily Traffic Violations per Intersection, 2019",
                 trendline="lowess",
                 trendline_options=dict(frac=0.3),
                 height=800
                 )

fig.show()

In [11]:
# total number of violations in 2019
df["sum_violations"].sum()

2453812.0

In [12]:
# total violations in 2019, per day across all intersections
resultY2019sum = df[["date", "sum_violations"]].groupby("date").aggregate("sum")
resultY2019sum = resultY2019sum.reset_index()
resultY2019sum

Unnamed: 0,date,sum_violations
0,2019-01-01,5719.0
1,2019-01-02,7083.0
2,2019-01-03,8159.0
3,2019-01-04,8176.0
4,2019-01-05,7781.0
...,...,...
360,2019-12-27,3860.0
361,2019-12-28,4065.0
362,2019-12-29,3881.0
363,2019-12-30,3231.0


In [None]:
# average violations in 2019, per day across all intersections
resultY2019avg = df[["date", "sum_violations"]].groupby("date").aggregate("mean")
resultY2019avg = resultY2019avg.reset_index()
resultY2019avg
# resultY2019avg.mean(numeric_only=True)

sum_violations    623.013629
dtype: float64

In [None]:
resultY2019avg = df[["intersection_uid", "sum_violations", "sum_e", "sum_w"]].groupby("intersection_uid").aggregate("mean")
resultY2019avg = resultY2019avg.reset_index()
resultY2019avg

Unnamed: 0,intersection_uid,sum_violations,sum_e,sum_w
0,10,1064.171271,673.08011,391.09116
1,11,1106.476923,663.603077,442.873846
2,12,786.093151,476.243836,309.849315
3,13,934.613699,505.879452,428.734247
4,15,758.945055,364.041209,394.903846
5,16,59.106628,0.0,59.106628
6,17,132.857534,67.810959,65.046575
7,18,812.882192,353.816438,459.065753
8,19,601.19382,267.587079,333.606742
9,20,522.208219,230.126027,292.082192


In [None]:
titles_dict = {
    10 : "King / Bathurst, Traffic Violations, 2019",
    11 : "King / Portland, Traffic Violations, 2019",
    12 : "King / Spadina, Traffic Violations, 2019",
    13 : "King / Peter, Traffic Violations, 2019",
    14 : "King / Simcoe, Traffic Violations, 2019",
    15 : "King / University, Traffic Violations, 2019",
    16 : "King / York, Traffic Violations, 2019",
    17 : "King / Bay, Traffic Violations, 2019",
    18 : "King / Yonge, Traffic Violations, 2019",
    19 : "King / Church, Traffic Violations, 2019",
    20 : "King / Jarvis, Traffic Violations, 2019",
    32 : "King / John, Traffic Violations, 2019"
}

In [None]:
var = 11
fig = px.scatter(df.loc[df["intersection_uid"] == var],
                 x="date",
                 y=restrictions_dict[var],
                 labels={
                     "date" : "Date",
                     "variable" : "Restricted Movement",
                     "value" : "Traffic Volume"
                 },
                 title=titles_dict[var],
                 trendline="lowess",
                 trendline_options=dict(frac=0.3),
                 height=600
                 )

varlabels = {
    "e_thru_vol" : "E Thru Traffic", 
    "e_left_vol" : "E Left Turns", 
    "e_right_vol" : "E Right Turns", 
    "w_thru_vol" : "W Thru Traffic", 
    "w_left_vol": "W Left Turns", 
    "w_right_vol" : "W Right Turns"}

fig.for_each_trace(lambda t: t.update(name = varlabels[t.name], 
                                      legendgroup = varlabels[t.name], 
                                      hovertemplate = t.hovertemplate.replace(t.name, varlabels[t.name])))

fig.show()

In [None]:
fig = px.scatter(df, 
                 x="date", 
                 y=["sum_e", "e_thru_vol", "sum_w", "w_thru_vol"], 
                 labels={
                     "date" : "Date", 
                     #"sum_violations" : "Total Violations",
                     "intersection_name" : "Intersection"}, 
                 trendline="lowess", 
                 trendline_options=dict(frac=0.3),
                 # trendline_color_override="blue",
                 facet_row="intersection_name", 
                 category_orders={
                     "intersection_name" : [
                         "King / Bathurst",
                         "King / Portland",
                         "King / Spadina",
                         "King / Peter",
                         "King / John", 
                         # "King / Simcoe",
                         "King / University",
                         "King / York",
                         "King / Bay",
                         "King / Yonge",
                         "King / Church",
                         "King / Jarvis"]}, 
                 height=1000, 
                 title="King Street Corridor, Daily Traffic Violations, East Approach vs West Approach, 2019")

fig.update_xaxes(dtick="M1", tickformat="%b\n%Y", ticklabelmode="period")
fig.update_yaxes(title_text = "", secondary_y=False)
fig.for_each_annotation(lambda a: a.update(text=a.text.replace("Intersection=King / ", "")))

# keep other annotations and add single y-axis title
fig.update_layout(
    # keep the original annotations and add a list of new annotations (single y-axis title):
    annotations = list(fig.layout.annotations) + [go.layout.Annotation(
        x=-0.07,
        y=0.5,
        font=dict(size=14),
        showarrow=False,
        text="Traffic Violations",
        textangle=-90,
        xref="paper",
        yref="paper"
        )
    ]
)

fig.data = [t for t in fig.data if t.mode == "lines"]
fig.update_traces(showlegend=True, selector=dict(xaxis="x"))
fig.show()

In [None]:
# df.to_csv(r"city-of-toronto-data\\violations2019.csv", index=False)