In [None]:
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import cufflinks as cf
import seaborn as sns
import plotly.express as px
%matplotlib inline

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

In [2]:
park_data = pd.read_csv('violations_merged.csv', low_memory=False)
park_data.head()

Unnamed: 0,Date,Time,Street,Location,Full Fine,Discounted Fine,Violation Code,Violation Description
0,2021-11-15,0:00:00,Mayfair PL,"(49.8822, -97.1381)",70.0,52.5,17,Too close to a traffic sign
1,2021-11-15,0:00:00,Wentworth ST,"(49.8714, -97.1612)",70.0,52.5,17,Too close to a traffic sign
2,2021-11-15,0:00:00,LOT128-960 THOMAS,"(49.8987, -97.0846)",70.0,52.5,16,No Park
3,2021-11-15,0:00:00,Lloyd ST,"(49.873, -97.1171)",100.0,75.0,13,Fire Hydrant
4,2021-11-15,0:00:00,Haggart AVE,"(49.9497, -97.2169)",70.0,52.5,4,Oversized Vehicle


In [4]:
park_data['Date'] = pd.to_datetime(park_data['Date'])  # skip if your Date column already in datetime format
park_data['Day of Week'] = park_data['Date'].dt.dayofweek

In [5]:
n = len(pd.unique(park_data['Street']))
print("No of Streets: ", n)

No of Streets:  4087


In [6]:
top_30_streets = park_data[['Street']].groupby(['Street'])['Street'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['count'], ascending=False) \
                             .head(30)
print(top_30_streets)

              Street  count
242    Bannatyne AVE  28532
3971     William AVE  28333
2437    McDermot AVE  26514
1599     Hargrave ST  26092
1907      Kennedy ST  22297
1185     Edmonton ST  18145
657       Carlton ST  17543
1398        Garry ST  17473
1339         Fort ST  15168
530         Broadway  15064
3515     St Mary AVE  14797
1059       Donald ST  13781
2956     Portage AVE  13735
1948         King ST  12804
3439        Smith ST  12775
1368        Furby ST  11198
233      Balmoral ST  10672
2701  Notre Dame AVE  10540
3480       Spence ST  10336
3390    Sherbrook ST  10297
2988     Princess ST  10153
28       Adelaide ST   9519
3213        Rorie ST   9048
2958    Portage Ave.   8848
4079        York AVE   8551
73             Alley   8098
3156       River AVE   7738
880      Corydon AVE   7711
2122     Langside ST   7256
1200       Elgin AVE   6958


In [8]:
fig = px.bar(top_30_streets, x='Street',y='count', color='Street',
             title='Top 30 Streets with Most Parking Tickets',
             labels={'count': 'Number of Tickets'})
fig.update_layout(uniformtext_minsize=8)
fig.update_layout(xaxis_tickangle=-45)
fig

In [9]:
tickets_by_day = park_data[['Day of Week']].groupby(['Day of Week'])['Day of Week'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['count'], ascending=False) \
                             .head(50)
print(tickets_by_day)

   Day of Week   count
3            3  167057
2            2  164516
1            1  163375
4            4  158115
0            0  141352
5            5   61072
6            6   54430


In [23]:
fig = px.bar(tickets_by_day, x='Day of Week',y='count', color='Day of Week',
             title='Number of Tickets Issued by Day of Week',
             labels={'count': 'Number of Tickets'})
fig.update_layout(
    xaxis=dict(
        tickmode='array',
        tickvals=[0, 1, 2, 3, 4, 5, 6],
        ticktext=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    )
)
fig

In [18]:
park_data['Year'], park_data['Month'] = park_data['Date'].dt.year, park_data['Date'].dt.month
park_data.head()

Unnamed: 0,Date,Time,Street,Location,Full Fine,Discounted Fine,Violation Code,Violation Description,Day of Week,Year,Month
0,2021-11-15,0:00:00,Mayfair PL,"(49.8822, -97.1381)",70.0,52.5,17,Too close to a traffic sign,0,2021,11
1,2021-11-15,0:00:00,Wentworth ST,"(49.8714, -97.1612)",70.0,52.5,17,Too close to a traffic sign,0,2021,11
2,2021-11-15,0:00:00,LOT128-960 THOMAS,"(49.8987, -97.0846)",70.0,52.5,16,No Park,0,2021,11
3,2021-11-15,0:00:00,Lloyd ST,"(49.873, -97.1171)",100.0,75.0,13,Fire Hydrant,0,2021,11
4,2021-11-15,0:00:00,Haggart AVE,"(49.9497, -97.2169)",70.0,52.5,4,Oversized Vehicle,0,2021,11


In [12]:
tickets_by_month = park_data[['Month']].groupby(['Month'])['Month'] \
                             .count() \
                             .reset_index(name='count') \
                             .head(50)
print(tickets_by_month)

    Month  count
0       1  82851
1       2  89655
2       3  64615
3       4  62436
4       5  65544
5       6  67868
6       7  72744
7       8  77259
8       9  81512
9      10  80907
10     11  68850
11     12  95676


In [14]:
fig = px.bar(tickets_by_month, x='Month',y='count', color='Month',
             title='Number of Tickets Issued by Month',
             labels={'count': 'Number of Violations'})
fig.update_layout(
    xaxis=dict(
        showticklabels=True,
        type='category',
        tickmode='array',
        tickvals=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    )
)
fig

In [15]:
import plotly.graph_objects as go

In [16]:
fig = px.line(tickets_by_month,
              x='Month',
              y='count',
              title='Number of Parking Tickets Issued by Month',
              labels={'count': 'Number of Tickets'},
              markers=True
              )
fig.update_layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        type='category',
        tickmode='array',
        tickvals=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    )
)
fig


In [17]:
tickets_by_year = park_data[['Year']].groupby(['Year'])['Year'] \
                             .count() \
                             .reset_index(name='count') \
                             .head(50)
print(tickets_by_year)

    Year   count
0   2011      24
1   2013   58715
2   2014  133775
3   2015  127340
4   2016  116409
5   2017  127463
6   2018  112286
7   2019   98287
8   2020   48969
9   2021   31803
10  2022   45949
11  2023    8897


In [19]:
fig = px.line(tickets_by_year,
              x='Year',
              y='count',
              title='Number of Parking Tickets Issued by Year',
              labels={'count': 'Number of Tickets'},
              markers=True
              )
fig.update_layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        type='category'
    )
)
fig

In [24]:
top_5_violations = park_data[['Violation Code']].groupby(['Violation Code'])['Violation Code'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['count'], ascending=False) \
                             .head(10)
print(top_5_violations)

    Violation Code   count
0                1  268550
37              38   55073
4                5   46732
2                3   46259
5                6   44550
38              39   43342
15              16   31882
32              33   29834
27              28   29340
6                7   28204


In [25]:
fig = px.bar(top_5_violations, x='Violation Code',y='count', color='Violation Code',
             title='Top 10 Parking Violations',
             labels={'count': 'Number of Tickets'})
fig.update_layout(
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        type='category',
        tickmode='array',
        tickvals=[1, 38, 5, 3, 6, 39, 16, 33, 28, 7],
        ticktext=['Meter Expired', 'Declared Snow Route', 'Overtime', 'Rush Hour', 'No Stop', 'Snow Route', 'No Park', 'Stand/stop/park on snow', 'No stopping 15:30- 17:30', 'No stopping anytime'],
    )
)
fig

In [103]:
check_data = pd.read_csv('dataCleaned1.csv', low_memory=False)
vc = check_data[['Violation Code', 'Violation Description']].groupby(['Violation Code', 'Violation Description'])['Violation Code'] \
                             .count() \
                             .reset_index(name='count') \
                             .sort_values(['Violation Code'], ascending=True)
vc.to_csv('out.csv')
print(vc)


    Violation Code         Violation Description   count
0                1       2006 Parked -meter expi       3
1                1       2011 Parked -meter expi  110710
2                1                 Meter Expired   58812
3               10              Blocked driveway     352
4               10      No parking - street work    8156
..             ...                           ...     ...
154            P64  No Stop/Park in a taxi stand     941
155            P65           Extended Snow Route     987
156            P67         No park/stop carshare      60
157            P68         Extended Winter Route     405
158            P69           Annual Winter Route    5025

[159 rows x 3 columns]
