## Imports and Helper Functions

In [23]:
import pandas as pd
import mplleaflet

# Replace any number of spaces with just a single space
def refineSpaces(x):
    return x.replace('\s+', ' ', regex=True)

# Remove leading and trailing white space
def removeExcessWhitespace(x):
    return x.str.strip()

# Convert the string to uppercase
def toUpper(x):
    return x.str.upper()


## Data Cleaning

In [24]:
violations = pd.read_csv('data/csv/ParkingViolations2015-Partial.csv')

violations[['House Number', 'Street Name']] = violations[['House Number', 'Street Name']].apply(refineSpaces)
violations[['House Number', 'Street Name']] = violations[['House Number', 'Street Name']].apply(removeExcessWhitespace)
violations[['House Number', 'Street Name']] = violations[['House Number', 'Street Name']].apply(toUpper)

openmaps = pd.read_csv('data/csv/city_of_new_york.csv')

openmaps[['NUMBER', 'STREET']] = openmaps[['NUMBER', 'STREET']].apply(refineSpaces)
openmaps[['NUMBER', 'STREET']] = openmaps[['NUMBER', 'STREET']].apply(removeExcessWhitespace)
openmaps[['NUMBER', 'STREET']] = openmaps[['NUMBER', 'STREET']].apply(toUpper)

print("Done cleaning data.")


Done cleaning data.


# Merge based on address

In [25]:
merged = pd.merge(violations, openmaps,  how='inner', left_on=['House Number','Street Name'], right_on = ['NUMBER','STREET'])
merged.to_csv("data/csv/Merged-2015.csv")

print(merged.shape)
print("Done merging data frames.")


(4208, 63)
Done merging data frames.


## Plot violations on a map

In [28]:
merged.plot.scatter(x='LON', y='LAT', marker='o', s=5, color='blue')
mplleaflet.save_html()
