In [2]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from haversine import haversine
from sklearn.model_selection import train_test_split
np.random.seed(42)
import scipy.stats as stats
import seaborn as sns
import re
from sklearn.preprocessing import StandardScaler
from datetime import timedelta  


from matplotlib import rcParams
rcParams.update({'figure.autolayout':True})

plt.style.use(["presentation"])

%matplotlib inline

In [3]:
df1=pd.read_pickle('./res_parking_permit_violations')

In [4]:
df2=pd.read_csv('./Residential_Parking_Permit_Blocks.csv')

In [5]:
df1.columns

Index(['X', 'Y', 'OBJECTID', 'SERVICECODE', 'SERVICECODEDESCRIPTION',
       'SERVICETYPECODEDESCRIPTION', 'ORGANIZATIONACRONYM', 'SERVICECALLCOUNT',
       'ADDDATE', 'RESOLUTIONDATE', 'SERVICEDUEDATE', 'SERVICEORDERDATE',
       'INSPECTIONFLAG', 'INSPECTIONDATE', 'INSPECTORNAME',
       'SERVICEORDERSTATUS', 'STATUS_CODE', 'SERVICEREQUESTID', 'PRIORITY',
       'STREETADDRESS', 'XCOORD', 'YCOORD', 'LATITUDE', 'LONGITUDE', 'CITY',
       'STATE', 'ZIPCODE', 'MARADDRESSREPOSITORYID', 'WARD', 'DETAILS'],
      dtype='object')

In [6]:
features=['OBJECTID', 'SERVICECODEDESCRIPTION', 'ADDDATE', 'RESOLUTIONDATE', 'STREETADDRESS',
          'LATITUDE', 'LONGITUDE', 'ZIPCODE', 'WARD', 'DETAILS']

In [7]:
# Creating a smaller version of the dataframe with just the features that might be useful

df_short=df1[features]
df_short = df_short.copy()

In [8]:
# Converting dates to Pandas Date-Time Format

df_short['ADDDATE']=df_short['ADDDATE'].map(pd.to_datetime)
df_short['RESOLUTIONDATE']=df_short['RESOLUTIONDATE'].map(pd.to_datetime)


In [9]:
df_short['day_of_week']=df_short['ADDDATE'].map(lambda x : x.weekday_name)

In [10]:
df_short['day_of_week'].value_counts()

Friday       5982
Thursday     5830
Wednesday    5809
Tuesday      5643
Monday       4954
Saturday     1392
Sunday        197
Name: day_of_week, dtype: int64

In [11]:
# Month parking violation occurred

df_short['month']=df_short['ADDDATE'].map(lambda x : x.month)

In [12]:
# Time in minutes to resolve complaint

df_short['resolvetime']=(df_short['RESOLUTIONDATE']-df_short['ADDDATE']).astype('timedelta64[m]')

In [14]:
df_short.to_csv('./shorter.csv')

In [54]:
# Code from Ben



import re

def get_address_block(address):
    try:
        m = re.findall('(\d+\s)', address)
        return(np.mean([float(x.strip()) for x in m]))
    except:
        return(np.nan)

df_short['BLOCKNUM']=df_short['STREETADDRESS'].apply(get_address_block)

# Code from Ben ends here

def round_address_block(address):
    try:
        address=address/100
        address=np.round(address,0)
        address=int(address*100)
        return address
    except:
        return(np.nan)

df_short['BLOCKNUM']=df_short['BLOCKNUM'].map(round_address_block)

  out=out, **kwargs)


In [68]:
def get_street_name(address):
    try:
        m1=address.replace("BLOCK OF","")
        m2=re.findall('([0-9]+[TH|ST]+|[A-Z]+)', m1)
        return " ".join(m2)
    except:
        return (np.nan)

df_short['STREET']=df_short['STREETADDRESS'].apply(get_street_name)

In [78]:
df_short=df_short.dropna(subset=['BLOCKNUM', 'STREET'])

In [81]:
df_short=df_short.copy()

In [82]:
df_short['GEOBIN']=df_short['BLOCKNUM'].apply(lambda x: str(int(x)))

In [89]:
df_short['GEOBIN']=df_short['GEOBIN'].apply(lambda x: x+ " BLOCK ")

In [93]:
df_short['BLOCK']=df_short['GEOBIN']+df_short['STREET']

In [94]:
# Here's the list of the blocks and the number of citations issued

df_short['BLOCK'].value_counts()

1400 BLOCK CRITTENDEN STREET NW       475
200 BLOCK RD STREET NE                437
1600 BLOCK KRAMER STREET NE           364
1200 BLOCK U STREET SE                315
0 BLOCK BATES STREET NW               277
1300 BLOCK I STREET NE                269
1500 BLOCK OLIVE STREET NE            252
4500 BLOCK CLARK PLACE NW             240
600 BLOCK 24TH STREET NE              224
6200 BLOCK ND STREET NW               205
900 BLOCK 6TH STREET NE               198
2200 BLOCK KEARNY STREET NE           193
500 BLOCK 24TH STREET NE              186
500 BLOCK E STREET NE                 164
2000 BLOCK 4TH STREET NE              155
500 BLOCK 5TH STREET NE               151
1200 BLOCK D STREET SE                150
0 BLOCK N STREET SW                   147
5300 BLOCK DANA PLACE NW              145
1400 BLOCK KENNEDY STREET NW          144
600 BLOCK 14TH STREET NE              127
100 BLOCK THOMAS STREET NW            126
900 BLOCK VARNUM STREET NE            126
1400 BLOCK HALF STREET SW         

In [106]:
# Here, for example, is a count of citations on Sunday, by contrast

df_short.loc[df_short['day_of_week']=='Sunday']['BLOCK'].value_counts()

1500 BLOCK SWANN STREET NW             13
100 BLOCK SEATON PLACE NW               7
1400 BLOCK SWANN STREET NW              6
1600 BLOCK 11TH STREET NW               6
1200 BLOCK E STREET SE                  6
900 BLOCK T STREET NW                   6
1100 BLOCK W STREET NW                  5
1700 BLOCK 9TH STREET NW                4
1500 BLOCK MARION STREET NW             3
1300 BLOCK R STREET NW                  3
1400 BLOCK HALF STREET SW               3
1500 BLOCK QUEEN STREET NE              2
1500 BLOCK S STREET SE                  2
200 BLOCK RD STREET NE                  2
1500 BLOCK 1ST STREET SW                2
3200 BLOCK 13TH STREET NW               2
5100 BLOCK SOUTHERN AVENUE SE           2
1000 BLOCK GIRARD STREET NW             2
1300 BLOCK SIMMS PLACE NE               2
900 BLOCK 7TH STREET SW                 2
900 BLOCK O STREET NW                   2
1300 BLOCK DELAFIELD PLACE NW           2
1300 BLOCK Q STREET NW                  2
700 BLOCK 7TH STREET SW           