## Imports and Helper Functions

In [94]:
import pandas as pd
import mplleaflet
import geopandas as gpd
import matplotlib.pyplot as plt

# Replace any number of spaces with just a single space
def refineSpaces(x):
    return x.replace('\s+', ' ', regex=True)

# Remove leading and trailing white space
def removeExcessWhitespace(x):
    return x.str.strip()

# Convert the string to uppercase
def toUpper(x):
    return x.str.upper()


## Data Cleaning

In [95]:
violations = pd.read_csv('data/csv/ParkingViolations2015-Partial.csv')

violations[['House Number', 'Street Name']] = violations[['House Number', 'Street Name']].apply(refineSpaces)
violations[['House Number', 'Street Name']] = violations[['House Number', 'Street Name']].apply(removeExcessWhitespace)
violations[['House Number', 'Street Name']] = violations[['House Number', 'Street Name']].apply(toUpper)

openmaps = pd.read_csv('data/csv/city_of_new_york.csv')

openmaps[['NUMBER', 'STREET']] = openmaps[['NUMBER', 'STREET']].apply(refineSpaces)
openmaps[['NUMBER', 'STREET']] = openmaps[['NUMBER', 'STREET']].apply(removeExcessWhitespace)
openmaps[['NUMBER', 'STREET']] = openmaps[['NUMBER', 'STREET']].apply(toUpper)

print("Done cleaning data.")


Done cleaning data.


# Merge based on address

In [96]:
merged = pd.merge(violations, openmaps,  how='inner', left_on=['House Number','Street Name'], right_on = ['NUMBER','STREET'])
merged.to_csv("data/csv/Merged-2015.csv")

print(merged.shape)
print("Done merging data frames.")


(4208, 63)
Done merging data frames.


## Plot violations on a map

In [104]:
shape = gpd.read_file('data/shp/nyc-zip')
print(shape.head)
colors = ['red', 'green', 'blue']
ax = shape.plot(color=colors)
ax = plt.scatter(merged['LON'], merged['LAT'], marker='o', s=5, color="blue")
mplleaflet.save_html(fig=ax.figure)


<bound method NDFrame.head of       zcta  bcode  note                                           geometry
0    10001  36061  None  POLYGON ((-74.00368399999996 40.74835499999983...
1    10002  36061  None  POLYGON ((-73.99694699999998 40.71207199999982...
2    10003  36061  None  POLYGON ((-73.99206099999998 40.72564199999982...
3    10007  36061  None  POLYGON ((-74.01046499999998 40.71153499999983...
4    10009  36061  None  POLYGON ((-73.97520799999998 40.71924299999983...
5    10036  36061  None  POLYGON ((-73.99818799999998 40.76492499999982...
6    10004  36061  None  (POLYGON ((-74.026658 40.68571499999982, -74.0...
7    10005  36061  None  POLYGON ((-74.00902899999998 40.70530999999981...
8    10006  36061  None  POLYGON ((-74.01502499999998 40.70627999999983...
9    10010  36061  None  POLYGON ((-73.98226699999998 40.73601999999982...
10   10011  36061  None  POLYGON ((-74.00368399999996 40.74835499999983...
11   10012  36061  None  POLYGON ((-74.00278899999999 40.7272719999998