## Summarizing parking tickets by year and infraction type

A few simple plots and outputting the summary table to .csv

In [1]:
import pandas as pd
import numpy as np
import os
import zipfile
import altair as alt

In [24]:
years = [2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022]
dfid_all = []
for year in years:
    
    # get tickets for the year
    dfs = []
    zf = zipfile.ZipFile("parking-tickets/parking-tickets-" + str(year) + ".zip")
    csvs = zf.namelist()
    for csv in csvs:
        dfs.append(pd.read_csv(zf.open(csv), engine="python", on_bad_lines='skip'))
        print(csv)
    df = pd.concat(dfs)
    del dfs
    del df["tag_number_masked"], df["province"]
    df["year"] = year
    df['year-month'] = df['date_of_infraction'].astype(str).str[:6]
    
    # compute counts by type
    dfid = pd.DataFrame(df.value_counts(["infraction_description", "year-month"]))
    dfid.columns=["count"]
    dfid = dfid.rename_axis(["infraction_description", "year-month"]).reset_index()
    
    # sum value of tickets
    dff = df.groupby(['infraction_description',"year-month"])['set_fine_amount'].sum().reset_index()
    
    # merge 
    dfid = pd.merge(left = dfid, right = dff, on = ['infraction_description',"year-month"])
        
    dfid["year"] = year
  
    dfid_all.append(dfid)

dfid = pd.concat(dfid_all)
dfid.to_csv("summary-month-type.csv")

Parking_Tags_data_2011.csv
Parking_Tags_Data_2012.csv
Parking_Tags_Data_2013.csv
Parking_Tags_Data_2014_2.csv
Parking_Tags_Data_2014_3.csv
Parking_Tags_Data_2014_4.csv
Parking_Tags_Data_2014_1.csv
Parking_Tags_Data_2015_1.csv
Parking_Tags_Data_2015_2.csv
Parking_Tags_Data_2015_3.csv
Parking_Tags_Data_2016_2.csv
Parking_Tags_Data_2016_3.csv
Parking_Tags_Data_2016_4.csv
Parking_Tags_Data_2016_1.csv
Parking_Tags_Data_2017_1.csv
Parking_Tags_Data_2017_2.csv
Parking_Tags_Data_2017_3.csv
Parking_Tags_Data_2018_1.csv
Parking_Tags_Data_2018_2.csv
Parking_Tags_Data_2018_3.csv
Parking_Tags_Data_2019.000.csv
Parking_Tags_Data_2019.001.csv
Parking_Tags_Data_2019.002.csv
Parking_Tags_Data_2019.003.csv
Parking_Tags_Data_2019.004.csv
Parking_Tags_Data_2019.005.csv
Parking_Tags_Data_2019.006.csv
Parking_Tags_Data_2019.007.csv
Parking_Tags_Data_2019.008.csv
Parking_Tags_Data_2020.000.csv
Parking_Tags_Data_2020.001.csv
Parking_Tags_Data_2020.002.csv
Parking_Tags_Data_2020.003.csv
Parking_Tags_Data_2020.

In [None]:
dfid = pd.read_csv("summary-month-type.csv")

In [25]:
dfid

Unnamed: 0,infraction_description,year-month,count,set_fine_amount,year
0,PARK PROHIBITED TIME NO PERMIT,201106,43001,1290030,2011
1,PARK PROHIBITED TIME NO PERMIT,201108,42757,1282710,2011
2,PARK PROHIBITED TIME NO PERMIT,201107,41594,1247820,2011
3,PARK PROHIBITED TIME NO PERMIT,201105,40773,1223190,2011
4,PARK PROHIBITED TIME NO PERMIT,201104,39975,1199250,2011
...,...,...,...,...,...
1291,PARK OTHER THAN METERED SPACE,202203,1,30,2022
1292,STAND VEHICLE-SIGNED HIGHWAY-3,202209,1,0,2022
1293,STAND VEHICLE-SIGNED HIGHWAY-3,202202,1,0,2022
1294,PARK ONSTRT ACCESSIBLE NOT D/O,202205,1,450,2022


### Total number of infractions by year for a specific type

In [36]:
# STAND SIGNED TRANSIT STOP
# STOP VEH OTR THN BCYCL-BYCL LN|STOP NON-BICYCLE IN CYCLE TRCK|PARK PRO VEH ON BICYCLE PATH

dfl = dfid[dfid["infraction_description"].str.contains(
    "STOP VEH OTR THN BCYCL-BYCL LN|STOP NON-BICYCLE IN CYCLE TRCK|PARK PRO VEH ON BICYCLE PATH")].groupby(
    ['year-month'])['count'].sum().reset_index()
dfl["date"] = dfl["year-month"].astype(str).str[:4] + "-" + dfl["year-month"].astype(str).str[4:] + "-01"

alt.Chart(dfl, title="# Of Tickets For Parking In Bike Lanes/Paths/Tracks", width=1000).mark_bar(
     # point=alt.OverlayMarkDef()
).encode(
    x=alt.X('date:T', title="Year", axis=alt.Axis(
        labelAngle=45,
        gridOpacity=0.5
    ), scale=alt.Scale(domain=["2010-06-01", "2022-12-01"])),
    y=alt.Y('count', title="")
)

In [33]:
dfl.to_csv("bike-lane-tickets-by-month.csv")