In [None]:
from os import path
import geopandas as gpd
import matplotlib
import pandas as pd


# Force matplotlib to plot from notebook
%matplotlib inline
# Increase default plot size
matplotlib.rcParams['figure.figsize'] = (20.0, 20.0)

# Read in GeoJSON data
neighborhoods = gpd.read_file('CPD_Neighborhoods.json')

# Drop extraneous columns
neighborhoods = neighborhoods.drop(['Beat', 'FID', 'Pop_2010'], axis=1)

# Rename columns
neighborhoods = neighborhoods.rename(columns={
    'NHOOD': 'neighborhood_raw',
    'District': 'actual_district',
    'Area_SQMI': 'area_sq_mi',
    'Shape_Area': 'area',
    'Shape_Leng': 'perimeter'
})

# Re-code Central Business District as District 1
neighborhoods.loc[neighborhoods.actual_district == 'CBD', 'actual_district'] = "1"

# Show neighborhoods being used in analysis
# neighborhoods.query('actual_district != "2" | actual_district != "3"')

In [None]:
neighborhood_name_mappings = pd.DataFrame([
    ['AVONDALE', 'Avondale'],
    ['BONDHILL', 'Bond Hill'],
    ['C.B.D./RIVERFRONT', 'Downtown'],
    ['CALIFORNIA', 'California'],
    ['CAMP WASHINGTON', 'Camp Washington'],
    ['CARTHAGE', 'Carthage'],
    ['CLIFTON', 'Clifton'],
    ['COLLEGE HILL', 'College Hill'],
    ['COLUMBIA TUSCULUM', 'Columbia Tusculum'],
    ['CORRYVILLE', 'Corryville'],
    ['CUF', 'CUF'],
    ['EAST END', 'East End'],
    ['EAST PRICE HILL', 'East Price Hill'],
    ['EAST WALNUT HILLS', 'East Walnut Hills'],
    ['EAST WESTWOOD', 'East Westwood'],
    ['ENGLISH WOODS', 'English Woods'],
    ['EVANSTON', 'Evanston'],
    ['FAY APARTMENTS', 'Villages at Roll Hill'],
    ['HARTWELL', 'Hartwell'],
    ['HYDE PARK', 'Hyde Park'],
    ['KENNEDY HEIGHTS', 'Kennedy Heights'],
    ['LINWOOD', 'Linwood'],
    ['LOWER PRICE HILL', 'Lower Price Hill'],
    ['MADISONVILLE', 'Madisonville'],
    ['MILLVALE', 'Millvale'],
    ['MOUNT ADAMS', 'Mt. Adams'],
    ['MOUNT AIRY', 'Mt. Airy'],
    ['MOUNT AUBURN', 'Mt. Auburn'],
    ['MT. LOOKOUT', 'Mt. Lookout'],
    ['MT. WASHINGTON', 'Mt. Washington'],
    ['NORTH AVONDALE', 'North Avondale'],
    ['NORTH FAIRMOUNT', 'North Fairmount'],
    ['NORTHSIDE', 'Northside'],
    ['OAKLEY', 'Oakley'],
    ['OVER THE RHINE', 'Over-the-Rhine'],
    ['PADDOCK HILLS', 'Paddock Hills'],
    ['PENDLETON', 'Pendleton'],
    ['PLEASANT RIDGE', 'Pleasant Ridge'],
    ['QUEENSGATE', 'Queensgate'],
    ['RIVERSIDE', 'Riverside'],
    ['ROSELAWN', 'Roselawn'],
    ['SAYLER PARK', 'Sayler Park'],
    ['SEDAMSVILLE', 'Sedamsville'],
    ['SOUTH CUMMINSVILLE', 'South Cumminsville'],
    ['SOUTH FAIRMOUNT', 'South Fairmount'],
    ['SPRING GROVE VILLAGE', 'Spring Grove Village'],
    ['WALNUT HILLS', 'Walnut Hills'],
    ['WEST END', 'West End'],
    ['WEST PRICE HILL', 'West Price Hill'],
    ['WESTWOOD', 'Westwood'],
    ['WINTON HILLS', 'Winton Hills']
], columns=['neighborhood_raw', 'neighborhood'])

In [None]:
# Merge cleaned names onto data
neighborhoods = neighborhoods.merge(neighborhood_name_mappings)
neighborhoods = neighborhoods.drop(['neighborhood_raw'], axis=1)

In [None]:
# Read in cleaned data
crimes_and_pops = pd.read_csv(path.join('..', 'data', 'crimes_clean_counts_and_pops.csv'), dtype={
    'district': str, 
    'crimes': int,
    'population': int
})

# Lowercase neighborhood for join
# crimes_and_pops['neighborhood_lower'] = crimes_and_pops.neighborhood.str.lower() 
# neighborhoods['neighborhood_lower'] = neighborhoods.neighborhood.str.lower()

# Merge with shape data
neighborhoods_merged = neighborhoods.merge(crimes_and_pops)

In [None]:
neighborhoods_merged.to_file('neighborhoods_clean.shp')