## Grouping and summarizing by Centreline

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import time
from functools import reduce

In [6]:
years = [2011,2012,2013,2014,2015,2016,2017]

dfs_all = []
dfs_load = []
dfs_bike = []

load_string = "PARK COMMERC LOAD ZONE NOT LDG|PARK VEH.-COMMERCIAL LOAD ZONE|PARK-PASSENGER/FREIGHT LOADING|PARK PASSENGER/FREIGHT LOAD ZO"
bike_string = ""

for year in years:
    
    print(year)
    df = pd.read_csv("data/tickets-with-centreline/twc-" + str(year) + ".gz")
    
    # summary all
    dfc = df.groupby(['CENTRELINE_ID']).size().reset_index(name='counts')
    dfd = df.groupby(['CENTRELINE_ID'])['set_fine_amount'].sum().reset_index()
    dfs = pd.merge(dfc,dfd,on="CENTRELINE_ID")
    dfs.columns=["CENTRELINE_ID","count-" + str(year), "fines-" + str(year)]
    dfs_all.append(dfs)
    
    # summary loading
    dfc = df[df["infraction_description"].str.contains(load_string)].groupby(['CENTRELINE_ID']).size().reset_index(name='counts')
    dfd = df[df["infraction_description"].str.contains(load_string)].groupby(['CENTRELINE_ID'])['set_fine_amount'].sum().reset_index()
    dfs = pd.merge(dfc,dfd,on="CENTRELINE_ID")
    dfs.columns=["CENTRELINE_ID","count-" + str(year), "fines-" + str(year)]
    dfs_load.append(dfs)


# all table
dfs_all = reduce(lambda left,right: pd.merge(left,right,on=['CENTRELINE_ID'],how='outer'), dfs_all)
dfs_all = dfs_all.fillna(0)
dfs_all["count_all"] = 0
dfs_all["fines_all"] = 0
for year in years:
    dfs_all["count_all"] = dfs_all["count_all"] + dfs_all["count-" + str(year)]
    dfs_all["fines_all"] = dfs_all["fines_all"] + dfs_all["fines-" + str(year)]
dfs_all.to_csv("data/centreline-stats/all.csv", index=False)

# all table
dfs_load = reduce(lambda left,right: pd.merge(left,right,on=['CENTRELINE_ID'],how='outer'), dfs_load)
dfs_load = dfs_load.fillna(0)
dfs_load["count_all"] = 0
dfs_load["fines_all"] = 0
for year in years:
    dfs_load["count_all"] = dfs_load["count_all"] + dfs_load["count-" + str(year)]
    dfs_load["fines_all"] = dfs_load["fines_all"] + dfs_load["fines-" + str(year)]
dfs_load.to_csv("data/centreline-stats/load.csv", index=False)


2011
2012
2013
2014
2015
2016
2017


In [4]:
dfs_all

Unnamed: 0,CENTRELINE_ID,count-2011,fines-2011,count-2012,fines-2012,count-2013,fines-2013,count-2014,fines-2014,count-2015,fines-2015,count-2016,fines-2016,count-2017,fines-2017,count_all,fines_all
0,-2,10623.0,421665.0,9861.0,389055.0,10831.0,432205.0,10874.0,469325.0,8681.0,411200.0,8184.0,381640.0,8219.0,403725.0,67273.0,2908815.0
1,-1,5336.0,228030.0,6253.0,262320.0,6716.0,275940.0,5356.0,229650.0,4987.0,239780.0,5055.0,241445.0,4576.0,217980.0,38279.0,1695145.0
2,0,154719.0,6161355.0,148731.0,5835550.0,116978.0,4670960.0,111766.0,4642365.0,103236.0,4205445.0,86448.0,3758280.0,86423.0,3831210.0,808301.0,33105165.0
3,117,24.0,935.0,14.0,510.0,35.0,1325.0,69.0,2820.0,28.0,945.0,7.0,250.0,7.0,315.0,184.0,7100.0
4,120,8.0,245.0,1.0,40.0,8.0,320.0,4.0,110.0,6.0,90.0,6.0,90.0,29.0,535.0,62.0,1430.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36562,30120301,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,60.0,2.0,60.0
36563,30120707,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,30.0,1.0,30.0
36564,30125286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,54.0,1620.0,54.0,1620.0
36565,30126216,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,433.0,15355.0,433.0,15355.0


In [5]:
dfs_load

Unnamed: 0,CENTRELINE_ID,count-2011,fines-2011,count-2012,fines-2012,count-2013,fines-2013,count-2014,fines-2014,count-2015,fines-2015,count-2016,fines-2016,count-2017,fines-2017,count_all,fines_all
0,-2,30.0,1200.0,28.0,1120.0,27.0,1080.0,16.0,640.0,20.0,800.0,9.0,360.0,42.0,1680.0,172.0,6880.0
1,-1,5.0,200.0,2.0,80.0,10.0,400.0,11.0,440.0,6.0,240.0,16.0,640.0,18.0,720.0,68.0,2720.0
2,0,880.0,35200.0,687.0,27480.0,510.0,20400.0,483.0,19200.0,351.0,13640.0,189.0,7560.0,406.0,16200.0,3506.0,139680.0
3,1579,1.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,40.0
4,437941,1.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,40.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,14674278,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,40.0,1.0,40.0
451,20034583,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,40.0,1.0,40.0
452,20042302,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,40.0,1.0,40.0
453,30081011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,40.0,1.0,40.0
