## Configure Notebook

In [2]:
# 3rd Party Imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd
import folium
from pathlib import Path
from shapely import wkt


# Configure Notebook
%matplotlib inline
plt.style.use('fivethirtyeight')
sns.set_context("notebook")
import warnings
warnings.filterwarnings('ignore')

# Data Gathering and Cleaning for Neighborhood Crime Rates

## Import Data

In [3]:
# Store Neighborhood Crime Rates Data as GeoDataFrame
crime = gpd.read_file("../0_raw_data/crime_data/neighbourhood-crime-rates - 4326.shp")
crime.head()
current_dir = Path.cwd()
parent_dir = current_dir.parent

In [4]:
# Explore GeoDataFrame Information
print(list(crime.columns))

['_id1', 'AREA_NA2', 'HOOD_ID3', 'POPULAT4', 'ASSAULT5', 'ASSAULT6', 'ASSAULT7', 'ASSAULT8', 'ASSAULT9', 'ASSAULT10', 'ASSAULT11', 'ASSAULT12', 'ASSAULT13', 'ASSAULT14', 'ASSAULT15', 'ASSAULT16', 'ASSAULT17', 'ASSAULT18', 'ASSAULT19', 'ASSAULT20', 'ASSAULT21', 'ASSAULT22', 'ASSAULT23', 'ASSAULT24', 'AUTOTHE25', 'AUTOTHE26', 'AUTOTHE27', 'AUTOTHE28', 'AUTOTHE29', 'AUTOTHE30', 'AUTOTHE31', 'AUTOTHE32', 'AUTOTHE33', 'AUTOTHE34', 'AUTOTHE35', 'AUTOTHE36', 'AUTOTHE37', 'AUTOTHE38', 'AUTOTHE39', 'AUTOTHE40', 'AUTOTHE41', 'AUTOTHE42', 'AUTOTHE43', 'AUTOTHE44', 'BIKETHE45', 'BIKETHE46', 'BIKETHE47', 'BIKETHE48', 'BIKETHE49', 'BIKETHE50', 'BIKETHE51', 'BIKETHE52', 'BIKETHE53', 'BIKETHE54', 'BIKETHE55', 'BIKETHE56', 'BIKETHE57', 'BIKETHE58', 'BIKETHE59', 'BIKETHE60', 'BIKETHE61', 'BIKETHE62', 'BIKETHE63', 'BIKETHE64', 'BREAKEN65', 'BREAKEN66', 'BREAKEN67', 'BREAKEN68', 'BREAKEN69', 'BREAKEN70', 'BREAKEN71', 'BREAKEN72', 'BREAKEN73', 'BREAKEN74', 'BREAKEN75', 'BREAKEN76', 'BREAKEN77', 'BREAKEN78'

In [5]:
# Summarize Data and Normalize by Population
# State Grouped Columns and Prefixes
column_groups = [
    (["ASSAULT"], "ASSAULT"),
    (["AUTOTHE"], "AUTOTHEFT"),
    (["BIKETHE"], "BIKETHEFT"),
    (["BREAKEN"], "BREAKENTER"),
    (["HOMICID"], "HOMICIDE"),
    (["ROBBERY"], "ROBBERY"),
    (["SHOOTIN"], "SHOOTING"),
    (["THEFTFR"], "THEFT_MV"),
    (["THEFTOV"], "THEFT_OV")
]

# Loop Through Columns and Summarize Data
for prefixes, new_column in column_groups:
    column_group = [col for col in crime.columns if any(col.startswith(prefix) for prefix in prefixes)]
    crime[new_column] = crime[column_group].mean(axis=1)
    crime[new_column + "_NORM"] = crime[new_column] / crime["POPULAT4"]
    crime.drop(columns=column_group, inplace=True)

crime.drop(columns = ["_id1", "HOOD_ID3"], inplace = True)
crime.rename(columns = {"AREA_NA2": "NEIGHBOURHOOD", "POPULAT4": "POPULATION"}, inplace = True)


over_ride = False
if over_ride:
    crime.to_csv(parent_dir / "2_data_cleaning" / "cleaned_csv" / "crime_cleaned.csv", index = False)

# View DataSet
crime.head()

Unnamed: 0,NEIGHBOURHOOD,POPULATION,geometry,ASSAULT,ASSAULT_NORM,AUTOTHEFT,AUTOTHEFT_NORM,BIKETHEFT,BIKETHEFT_NORM,BREAKENTER,...,HOMICIDE,HOMICIDE_NORM,ROBBERY,ROBBERY_NORM,SHOOTING,SHOOTING_NORM,THEFT_MV,THEFT_MV_NORM,THEFT_OV,THEFT_OV_NORM
0,South Eglinton-Davisville,21987,"POLYGON ((-79.38635 43.69784, -79.38623 43.697...",257.80672,0.011725,30.665895,0.001395,104.975584,0.004774,130.266611,...,1.19752,5.4e-05,32.034761,0.001457,1.820015,8.3e-05,76.243084,0.003468,11.917225,0.000542
1,North Toronto,15077,"POLYGON ((-79.39744 43.70694, -79.39837 43.706...",312.22718,0.020709,28.839955,0.001913,77.893082,0.005166,171.983067,...,0.843186,5.6e-05,40.480436,0.002685,0.799834,5.3e-05,75.922334,0.005036,15.778356,0.001047
2,Dovercourt Village,13837,"POLYGON ((-79.43412 43.66015, -79.43537 43.659...",353.172389,0.025524,61.799989,0.004466,135.222384,0.009773,171.290513,...,2.072072,0.00015,86.070851,0.00622,2.082016,0.00015,187.256442,0.013533,14.555833,0.001052
3,Junction-Wallace Emerson,26240,"POLYGON ((-79.4387 43.66767, -79.43841 43.6669...",422.010159,0.016083,68.775774,0.002621,127.214314,0.004848,172.364532,...,1.542461,5.9e-05,57.355054,0.002186,6.464922,0.000246,200.356534,0.007636,16.70156,0.000636
4,Yonge-Bay Corridor,14731,"POLYGON ((-79.38404 43.64497, -79.38502 43.644...",2218.028369,0.150569,103.68067,0.007038,930.550018,0.06317,479.260519,...,4.064631,0.000276,298.961034,0.020295,8.989731,0.00061,475.31357,0.032266,176.995572,0.012015
