# Python for Data Science: Final Project

In this project, an open dataset from the site <a href="https://www.kaggle.com/wosaku/crime-in-vancouver">Crime in Vancouver</a> is being used.

### Preliminarily explore dataset

In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib as plt
%matplotlib inline

import folium
from folium import Circle, Marker
from folium.plugins import HeatMap, MarkerCluster

In [2]:
data = pd.read_csv('./final_project_data/1929_6405_bundle_archive/crime.csv')
print(data.shape)

data = data.dropna()
print(len(data))

data.head(2)

(530652, 12)
474015


Unnamed: 0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD,X,Y,Latitude,Longitude
0,Other Theft,2003,5,12,16.0,15.0,9XX TERMINAL AVE,Strathcona,493906.5,5457452.47,49.269802,-123.083763
1,Other Theft,2003,5,7,15.0,20.0,9XX TERMINAL AVE,Strathcona,493906.5,5457452.47,49.269802,-123.083763


In [3]:
print(len(data.TYPE.unique()))
data.TYPE.unique()

9


array(['Other Theft', 'Break and Enter Residential/Other', 'Mischief',
       'Break and Enter Commercial', 'Theft from Vehicle',
       'Vehicle Collision or Pedestrian Struck (with Injury)',
       'Vehicle Collision or Pedestrian Struck (with Fatality)',
       'Theft of Vehicle', 'Theft of Bicycle'], dtype=object)

In [4]:
print(len(data.NEIGHBOURHOOD.unique()))
data.NEIGHBOURHOOD.unique()

24


array(['Strathcona', 'Kerrisdale', 'Dunbar-Southlands',
       'Grandview-Woodland', 'Sunset', 'West End',
       'Central Business District', 'Hastings-Sunrise',
       'Victoria-Fraserview', 'Fairview', 'Kensington-Cedar Cottage',
       'West Point Grey', 'Shaughnessy', 'Renfrew-Collingwood',
       'Killarney', 'Riley Park', 'Arbutus Ridge', 'Musqueam',
       'Mount Pleasant', 'Kitsilano', 'Stanley Park', 'South Cambie',
       'Marpole', 'Oakridge'], dtype=object)

In [5]:
len(data.HUNDRED_BLOCK.unique())

21192

### Create interactive map

The original data set contains coordinates in UTM Zone 10 (columns X and Y) as well as the Latitude and Longitude.


In [6]:
# download the base map
m_1 = folium.Map(location = [49.2827,-123.1207], tiles = 'openstreetmap', zoom_start = 10)
m_1

In [11]:
# winter early morning break-and-enter in December
morning_breakin = data[data.TYPE.str.contains('Break and Enter') & data.HOUR.isin(range(0,4)) & (data.MONTH == 12)]
len(morning_breakin)

1071

In [12]:
m_2 = folium.Map(location = [49.2827,-123.1207], tiles = 'cartodbpositron', zoom_start = 12)

for idx, row in morning_breakin.iterrows():
    Marker([row['Latitude'], row['Longitude']]).add_to(m_2)
    
m_2

In [13]:
m_3 = folium.Map(location = [49.2827,-123.1207], tiles = 'stamenterrain', zoom_start = 12)

mc = MarkerCluster()
for idx, row in morning_breakin.iterrows():
    if not math.isnan(row['Latitude']) and not math.isnan(row['Longitude']):
        mc.add_child(Marker([row['Latitude'], row['Longitude']]))
m_3.add_child(mc)

m_3

In [18]:
m_4 = folium.Map(location = [49.2827,-123.1207], tiles = 'stamenwatercolor', zoom_start = 12)

def color_producer(val):
    if val == 'Break and Enter Commercial':
        return 'indigo'
    else:
        return 'darkred' # residential and others

for i in range(0, len(morning_breakin)):
    Circle(location = [morning_breakin.iloc[i]['Latitude'], morning_breakin.iloc[i]['Longitude']], radius = 20,
        color = color_producer(morning_breakin.iloc[i]['TYPE'])).add_to(m_4)

m_4

In [19]:
m_5 = folium.Map(location = [49.2827,-123.1207], tiles = 'stamentoner', zoom_start = 12)

HeatMap(data = morning_breakin[['Latitude', 'Longitude']], radius=10).add_to(m_5)

m_5