In [150]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from haversine import haversine
from sklearn.model_selection import train_test_split
np.random.seed(42)
import scipy.stats as stats
import seaborn as sns
import re
from sklearn.preprocessing import StandardScaler
from datetime import timedelta  
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar

from matplotlib import rcParams
rcParams.update({'figure.autolayout':True})

plt.style.use(["presentation"])

%matplotlib inline

In [151]:
df1=pd.read_pickle('./res_parking_permit_violations')

In [152]:
df2=pd.read_csv('./Residential_Parking_Permit_Blocks.csv')

In [153]:
features=['OBJECTID', 'SERVICECODEDESCRIPTION', 'ADDDATE', 'RESOLUTIONDATE', 'STREETADDRESS',
          'LATITUDE', 'LONGITUDE', 'ZIPCODE', 'WARD', 'DETAILS']

In [154]:
# Creating a smaller version of the dataframe with just the features that might be useful

df_short=df1[features]
df_short = df_short.copy()

# A. Create some initial date-related features

In [155]:
# Converting dates to Pandas Date-Time Format

df_short['ADDDATE']=df_short['ADDDATE'].map(pd.to_datetime)
df_short['RESOLUTIONDATE']=df_short['RESOLUTIONDATE'].map(pd.to_datetime)


In [156]:
trial=df_short['ADDDATE'][58]

In [157]:
trial.date

<function Timestamp.date>

In [158]:
# Date ticket issued

df_short['date']=df_short['ADDDATE'].map(lambda x : x.date)

In [159]:
# Day of week ticket issued

df_short['day_of_week']=df_short['ADDDATE'].map(lambda x : x.weekday_name)

In [160]:
# Year ticket issued

df_short['year']=df_short['ADDDATE'].map(lambda x : x.year)

In [161]:
# Month parking violation occurred

df_short['month']=df_short['ADDDATE'].map(lambda x : x.month)

In [162]:
# Time in minutes to resolve complaint

df_short['resolvetime']=(df_short['RESOLUTIONDATE']-df_short['ADDDATE']).astype('timedelta64[m]')

# B. Bin observations by block

In [163]:
# Code from Ben



import re

def get_address_block(address):
    try:
        m = re.findall('(\d+\s)', address)
        return(np.mean([float(x.strip()) for x in m]))
    except:
        return(np.nan)

df_short['BLOCKNUM']=df_short['STREETADDRESS'].apply(get_address_block)

# Code from Ben ends here

def round_address_block(address):
    try:
        address=address/100
        address=np.round(address,0)
        address=int(address*100)
        return address
    except:
        return(np.nan)

df_short['BLOCKNUM']=df_short['BLOCKNUM'].map(round_address_block)

  out=out, **kwargs)


In [164]:
def get_street_name(address):
    try:
        m1=address.replace("BLOCK OF","")
        m2=re.findall('([0-9]+[TH|ST|RD]+|[A-Z]+)', m1)
        return " ".join(m2)
    except:
        return (np.nan)

df_short['STREET']=df_short['STREETADDRESS'].apply(get_street_name)

In [165]:
df_short=df_short.dropna(subset=['BLOCKNUM', 'STREET'])

In [166]:
df_short=df_short.copy()

In [167]:
df_short['GEOBIN']=df_short['BLOCKNUM'].apply(lambda x: str(int(x)))

In [168]:
df_short['GEOBIN']=df_short['GEOBIN'].apply(lambda x: x+ " BLOCK ")

In [169]:
df_short['BLOCK']=df_short['GEOBIN']+df_short['STREET']

# C. A few observations about the blocks

In [170]:
# Here's the list of the blocks and the number of citations issued

df_short['BLOCK'].value_counts(normalize=True)[0:50]

1400 BLOCK CRITTENDEN STREET NW       0.016447
200 BLOCK 3RD STREET NE               0.015131
1600 BLOCK KRAMER STREET NE           0.012603
1200 BLOCK U STREET SE                0.010907
0 BLOCK BATES STREET NW               0.009591
1300 BLOCK I STREET NE                0.009314
1500 BLOCK OLIVE STREET NE            0.008725
4500 BLOCK CLARK PLACE NW             0.008310
600 BLOCK 24TH STREET NE              0.007756
6200 BLOCK ND STREET NW               0.007098
900 BLOCK 6TH STREET NE               0.006856
2200 BLOCK KEARNY STREET NE           0.006683
500 BLOCK 24TH STREET NE              0.006440
500 BLOCK E STREET NE                 0.005678
2000 BLOCK 4TH STREET NE              0.005367
500 BLOCK 5TH STREET NE               0.005228
1200 BLOCK D STREET SE                0.005194
0 BLOCK N STREET SW                   0.005090
5300 BLOCK DANA PLACE NW              0.005021
1400 BLOCK KENNEDY STREET NW          0.004986
600 BLOCK 14TH STREET NE              0.004397
900 BLOCK VAR

In [171]:
# Here, for example, is a count of citations on Sunday, by contrast

df_short.loc[df_short['day_of_week']=='Sunday']['BLOCK'].value_counts()[0:50]

1500 BLOCK SWANN STREET NW          13
100 BLOCK SEATON PLACE NW            7
900 BLOCK T STREET NW                6
1200 BLOCK E STREET SE               6
1400 BLOCK SWANN STREET NW           6
1600 BLOCK 11TH STREET NW            6
1100 BLOCK W STREET NW               5
1700 BLOCK 9TH STREET NW             4
1500 BLOCK MARION STREET NW          3
1400 BLOCK HALF STREET SW            3
1300 BLOCK R STREET NW               3
900 BLOCK P STREET NW                2
1500 BLOCK 1ST STREET SW             2
1500 BLOCK QUEEN STREET NE           2
5100 BLOCK SOUTHERN AVENUE SE        2
1300 BLOCK SIMMS PLACE NE            2
1500 BLOCK S STREET SE               2
3200 BLOCK 13TH STREET NW            2
1000 BLOCK GIRARD STREET NW          2
700 BLOCK 7TH STREET SW              2
900 BLOCK 7TH STREET SW              2
900 BLOCK O STREET NW                2
1300 BLOCK Q STREET NW               2
1300 BLOCK DELAFIELD PLACE NW        2
200 BLOCK 3RD STREET NE              2
7200 BLOCK 15TH PLACE NW 

In [174]:
# Here, for example, is a count of citations in 2017

df_short.loc[df_short['year']==2017]['BLOCK'].value_counts()[0:50]

1200 BLOCK D STREET SE                126
1200 BLOCK U STREET SE                116
200 BLOCK 3RD STREET NE               111
600 BLOCK 24TH STREET NE              110
1500 BLOCK OLIVE STREET NE            105
900 BLOCK 6TH STREET NE               105
1400 BLOCK CRITTENDEN STREET NW        87
1700 BLOCK NEW HAMPSHIRE AVENUE NW     79
2200 BLOCK KEARNY STREET NE            77
500 BLOCK 24TH STREET NE               75
2400 BLOCK E STREET NE                 66
4500 BLOCK CLARK PLACE NW              63
900 BLOCK VARNUM STREET NE             62
0 BLOCK BATES STREET NW                60
1600 BLOCK OLIVE STREET NE             60
1300 BLOCK I STREET NE                 57
1600 BLOCK KRAMER STREET NE            56
300 BLOCK 5TH STREET NE                50
1200 BLOCK SAVANNAH PLACE SE           44
1700 BLOCK 14TH STREET SE              43
0 BLOCK HANOVER PLACE NW               42
0 BLOCK N STREET SW                    40
1400 BLOCK HALF STREET SW              39
4600 BLOCK QUARLES STREET NE      

In [175]:
# Here, for example, is a count of citations on 2016

df_short.loc[df_short['year']==2016]['BLOCK'].value_counts()[0:50]

200 BLOCK 3RD STREET NE              159
4500 BLOCK CLARK PLACE NW            113
0 BLOCK BATES STREET NW              104
1300 BLOCK I STREET NE                98
1400 BLOCK CRITTENDEN STREET NW       88
500 BLOCK 5TH STREET NE               79
1200 BLOCK U STREET SE                76
1400 BLOCK KENNEDY STREET NW          69
0 BLOCK N STREET SW                   68
600 BLOCK STREET NW                   66
2000 BLOCK 4TH STREET NE              57
500 BLOCK 24TH STREET NE              56
500 BLOCK E STREET NE                 51
5300 BLOCK ILLINOIS AVENUE NW         50
900 BLOCK VARNUM STREET NE            45
1500 BLOCK OLIVE STREET NE            42
1100 BLOCK I STREET NE                42
2200 BLOCK KEARNY STREET NE           40
600 BLOCK INDEPENDENCE AVENUE SE      40
100 BLOCK THOMAS STREET NW            38
600 BLOCK G STREET NE                 37
600 BLOCK 24TH STREET NE              37
900 BLOCK 6TH STREET NE               35
1600 BLOCK KRAMER STREET NE           35
1500 BLOCK SWANN