In [3]:
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import cufflinks as cf
import seaborn as sns
import plotly.express as px
%matplotlib inline

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

In [4]:
park_data = pd.read_csv('dataCleaned1.csv', low_memory=False)
park_data.head()

Unnamed: 0,Date,Time,Street,Location,Full Fine,Discounted Fine,Violation Code,Violation Description
0,2021-11-15,0:00:00,Mayfair PL,"(49.8822, -97.1381)",70.0,52.5,P17,Too close to a traffic sign
1,2021-11-15,0:00:00,Wentworth ST,"(49.8714, -97.1612)",70.0,52.5,P17,Too close to a traffic sign
2,2021-11-15,0:00:00,LOT128-960 THOMAS,"(49.8987, -97.0846)",70.0,52.5,P16,No Park
3,2021-11-15,0:00:00,Lloyd ST,"(49.873, -97.1171)",100.0,75.0,P13,Fire Hydrant
4,2021-11-15,0:00:00,Haggart AVE,"(49.9497, -97.2169)",70.0,52.5,P04,Oversized Vehicle


In [9]:
park_data['Date'] = pd.to_datetime(park_data['Date'])     # skip if your Date column already in datetime format
park_data['Day of Week'] = park_data['Date'].dt.dayofweek
hag_data = park_data.query("Street == 'Lloyd ST'")
fig = px.bar(hag_data, y='Violation Code', x='Location', text='Violation Code', color='Location')
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8)
fig.update_layout(xaxis_tickangle=-45)
fig

In [13]:
park_data['Date'] = pd.to_datetime(park_data['Date'])  # skip if your Date column already in datetime format
park_data['Day of Week'] = park_data['Date'].dt.dayofweek
hag_data = park_data.query("Street == 'Lloyd ST'")
fig = px.bar(hag_data, y='Day of Week', x='Full Fine', color='Day of Week')
fig.update_layout(uniformtext_minsize=8)
fig.update_layout(xaxis_tickangle=-45)
fig

In [22]:
park_data['Date'] = pd.to_datetime(park_data['Date'])  # skip if your Date column already in datetime format
park_data['Day of Week'] = park_data['Date'].dt.dayofweek
hag_data = park_data.query("Street == 'Lloyd ST'")
sorted_hag = hag_data.sort_values(by="Date")
sorted_hag.head()

Unnamed: 0,Date,Time,Street,Location,Full Fine,Discounted Fine,Violation Code,Violation Description,Day of Week
161129,2013-09-02,8:55:50,Lloyd ST,"(49.8752716, -97.119451)",70.0,35.0,11,No parking - loading zone,0
193115,2013-09-28,8:26:56,Lloyd ST,"(49.8743633, -97.118573)",70.0,35.0,11,No parking - loading zone,5
154803,2013-09-28,8:19:26,Lloyd ST,"(49.8743466, -97.118556)",70.0,35.0,11,No parking - loading zone,5
177710,2013-09-29,11:09:15,Lloyd ST,"(49.87426, -97.1185)",70.0,35.0,11,No parking - loading zone,6
224068,2013-12-12,2:22:00,Lloyd ST,"(49.8728866, -97.116813)",150.0,75.0,34,2011 Stand/stop/park in resid,3


In [21]:
fig = px.scatter(sorted_hag, x='Date', y="Full Fine")
fig.show()

In [26]:
sorted_week = hag_data.sort_values(by="Day of Week")
sorted_week.head()

Unnamed: 0,Date,Time,Street,Location,Full Fine,Discounted Fine,Violation Code,Violation Description,Day of Week
3,2021-11-15,0:00:00,Lloyd ST,"(49.873, -97.1171)",100.0,75.0,P13,Fire Hydrant,0
661059,2017-06-26,9:19:37,Lloyd ST,"(49.8728083, -97.116613)",70.0,35.0,17,Too close to a traffic sign,0
659776,2017-08-14,10:42:04,Lloyd ST,"(49.8753383, -97.119958)",100.0,50.0,9,Too close/in an intersection,0
848893,2019-04-08,4:29:19,Lloyd ST,"(49.8735383, -97.117498)",100.0,75.0,P13,Fire Hydrant,0
526511,2016-07-11,7:47:18,Lloyd ST,"(49.8742833, -97.118488)",70.0,35.0,11,No parking - loading zone,0


In [25]:
fig = px.scatter(sorted_week, x='Day of Week', y="Full Fine")
fig.show()

In [29]:
n = len(pd.unique(park_data['Street']))
print("No of Streets: ", n)

No of Streets:  4087


In [34]:
df2 = park_data.groupby(['Street'])['Street'].count()
print(df2)

Street
495 Portage Ave               5
AT OR NEAR WINNIPEG, MB       1
AV DE L'EGLISE AVE           22
AV DE LA CATHEDRALE AVE     683
AV DE LA DIGUE AVE            1
                           ... 
Young ST                   6705
Zawaly BAY                   27
Zeglinski CRES                1
Zoe LANE                      3
Zylema COVE                   2
Name: Street, Length: 4087, dtype: int64


In [35]:
# count occurrences a particular column
occur = park_data.groupby(['Street']).size()

# display occurrences of a particular column
display(occur)

Street
495 Portage Ave               5
AT OR NEAR WINNIPEG, MB       1
AV DE L'EGLISE AVE           22
AV DE LA CATHEDRALE AVE     683
AV DE LA DIGUE AVE            1
                           ... 
Young ST                   6705
Zawaly BAY                   27
Zeglinski CRES                1
Zoe LANE                      3
Zylema COVE                   2
Length: 4087, dtype: int64

In [37]:
df2 = park_data.groupby(['Street']).count().sort_values(['count'], ascending=False).head(5)
print(df2)

KeyError: 'count'

In [46]:
top_50_streets = park_data[['Street']].groupby(['Street'])['Street'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['count'], ascending=False) \
                             .head(50)
print(top_50_streets)

                              Street  count
242                    Bannatyne AVE  28532
3971                     William AVE  28333
2437                    McDermot AVE  26514
1599                     Hargrave ST  26092
1907                      Kennedy ST  22297
1185                     Edmonton ST  18145
657                       Carlton ST  17543
1398                        Garry ST  17473
1339                         Fort ST  15168
530                         Broadway  15064
3515                     St Mary AVE  14797
1059                       Donald ST  13781
2956                     Portage AVE  13735
1948                         King ST  12804
3439                        Smith ST  12775
1368                        Furby ST  11198
233                      Balmoral ST  10672
2701                  Notre Dame AVE  10540
3480                       Spence ST  10336
3390                    Sherbrook ST  10297
2988                     Princess ST  10153
28                       Adelaid

In [93]:
fig = px.bar(top_50_streets, x='Street',y='count', color='Street',
             title='Top 50 Streets with Most Parking Tickets',
             labels={'count': 'Number of Tickets'})
fig.update_layout(uniformtext_minsize=8)
fig.update_layout(xaxis_tickangle=-45)
fig

In [51]:
tickets_by_day = park_data[['Day of Week']].groupby(['Day of Week'])['Day of Week'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['count'], ascending=False) \
                             .head(50)
print(tickets_by_day)

   Day of Week   count
3            3  167057
2            2  164516
1            1  163375
4            4  158115
0            0  141352
5            5   61072
6            6   54430


In [98]:
fig = px.bar(tickets_by_day, x='Day of Week',y='count', color='Day of Week',
             title='Number of Tickets Issued by Day of Week',
             labels={'count': 'Number of Tickets'})
fig.update_layout(
    xaxis=dict(
        tickmode='array',
        tickvals=[0, 1, 2, 3, 4, 5, 6],
        ticktext=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    )
)
fig

In [54]:
park_data['Year'], park_data['Month'] = park_data['Date'].dt.year, park_data['Date'].dt.month
park_data.head()

Unnamed: 0,Date,Time,Street,Location,Full Fine,Discounted Fine,Violation Code,Violation Description,Day of Week,Year,Month
0,2021-11-15,0:00:00,Mayfair PL,"(49.8822, -97.1381)",70.0,52.5,P17,Too close to a traffic sign,0,2021,11
1,2021-11-15,0:00:00,Wentworth ST,"(49.8714, -97.1612)",70.0,52.5,P17,Too close to a traffic sign,0,2021,11
2,2021-11-15,0:00:00,LOT128-960 THOMAS,"(49.8987, -97.0846)",70.0,52.5,P16,No Park,0,2021,11
3,2021-11-15,0:00:00,Lloyd ST,"(49.873, -97.1171)",100.0,75.0,P13,Fire Hydrant,0,2021,11
4,2021-11-15,0:00:00,Haggart AVE,"(49.9497, -97.2169)",70.0,52.5,P04,Oversized Vehicle,0,2021,11


In [55]:
tickets_by_month = park_data[['Month']].groupby(['Month'])['Month'] \
                             .count() \
                             .reset_index(name='count') \
                             .head(50)
print(tickets_by_month)

    Month  count
0       1  82851
1       2  89655
2       3  64615
3       4  62436
4       5  65544
5       6  67868
6       7  72744
7       8  77259
8       9  81512
9      10  80907
10     11  68850
11     12  95676


In [67]:
fig = px.bar(tickets_by_month, x='Month',y='count', color='Month',
             title='Number of Tickets Issued by Month',
             labels={'count': 'Number of Violations'})
fig.update_layout(
    xaxis=dict(
        showticklabels=True,
        type='category',
        tickmode='array',
        tickvals=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    )
)
fig

In [59]:
px.line(tickets_by_month, x='Month', y='count', labels={'x': 'Month', 'y': 'Number of Violations'})

In [95]:
fig = px.line(tickets_by_month, x='Month',y='count',
             title='Number of Tickets Issued by Month',
             labels={'count': 'Number of Violations'},
              )
fig.update_layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        type='category',
        tickmode='array',
        tickvals=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    )
)
fig

In [76]:
import plotly.graph_objects as go

In [90]:
fig = px.line(tickets_by_month,
              x='Month',
              y='count',
              title='Number of Parking Tickets Issued by Month',
              labels={'count': 'Number of Tickets'},
              markers=True
              )
fig.update_layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        type='category',
        tickmode='array',
        tickvals=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    )
)
fig


In [87]:
tickets_by_year = park_data[['Year']].groupby(['Year'])['Year'] \
                             .count() \
                             .reset_index(name='count') \
                             .head(50)
print(tickets_by_year)

    Year   count
0   2011      24
1   2013   58715
2   2014  133775
3   2015  127340
4   2016  116409
5   2017  127463
6   2018  112286
7   2019   98287
8   2020   48969
9   2021   31803
10  2022   45949
11  2023    8897


In [89]:
fig = px.line(tickets_by_year,
              x='Year',
              y='count',
              title='Number of Parking Tickets Issued by Year',
              labels={'count': 'Number of Tickets'},
              markers=True
              )
fig.update_layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        type='category'
    )
)
fig

In [103]:
top_5_violations = park_data[['Violation Description']].groupby(['Violation Description'])['Violation Description'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['count'], ascending=False) \
                             .head(10)
print(top_5_violations)

           Violation Description   count
51                 Meter Expired  157837
18       2011 Parked -meter expi  110710
14         2010 Overtime Parking   51469
76                      Overtime   46700
87                     Rush Hour   46126
89                    Snow Route   43328
61                       No Stop   40854
20  2011 Stand/stop/park on snow   31303
72           No stopping anytime   27422
58                       No Park   27259


In [105]:
fig = px.bar(top_5_violations, x='Violation Description',y='count', color='Violation Description',
             title='Top 10 Parking Violations',
             labels={'count': 'Number of Tickets'})
fig