## Analyze the impact of the 2010 Olympic Winter Games in Vancouver on crime rate 

Please use http://nbviewer.org/ if you cannot view maps in this notebook.

In [9]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
import geocoder
from geopy.geocoders import Nominatim
import folium
from folium.plugins import HeatMap
import pyproj
import ast

In [10]:
df = pd.read_csv("data/crime_records.csv")
df.head()

Unnamed: 0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD,X,Y
0,Break and Enter Commercial,2012,12,14,8,52,,Oakridge,491285.0,5453433.0
1,Break and Enter Commercial,2019,3,7,2,6,10XX SITKA SQ,Fairview,490612.964805,5457110.0
2,Break and Enter Commercial,2019,8,27,4,12,10XX ALBERNI ST,West End,491007.779775,5459174.0
3,Break and Enter Commercial,2014,8,8,5,13,10XX ALBERNI ST,West End,491015.943352,5459166.0
4,Break and Enter Commercial,2005,11,14,3,9,10XX ALBERNI ST,West End,491021.385727,5459161.0


In [11]:
df_olympic = pd.read_csv("data/olympic-venues.csv", sep=';')
df_olympic['Geom'] = df_olympic['Geom'].apply(ast.literal_eval)
df_olympic['LAT'] = df_olympic['Geom'].str['coordinates'].str[1]
df_olympic['LON'] = df_olympic['Geom'].str['coordinates'].str[0]
df_olympic.drop(['Geom', 'URL_LINK'], axis=1, inplace=True)
df_olympic

Unnamed: 0,VENUE,LAT,LON
0,UBC Thunderbird Arena,49.259985,-123.241677
1,Vancouver Olympic and Paralympic Village,49.271252,-123.107513
2,Vancouver Olympic Centre / Paralympic Centre,49.24427,-123.108887
3,Trout Lake Training Venue,49.254578,-123.064735
4,Pacific Coliseum,49.285896,-123.042763
5,Britannia Training Venue,49.276035,-123.070617
6,Main Media Centre,49.288544,-123.116058
7,Killarney Training Venue,49.22683,-123.044777
8,Canada Hockey Place,49.277912,-123.108788
9,BC Place,49.276672,-123.11219


In [12]:
# Get latitude and longitude of Vancouver
address = 'Vancouver, Canada'

geolocator = Nominatim(user_agent="vancouver_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Vancouver are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Vancouver are 49.2608724, -123.113952.


In [13]:
# Create Olympic venues map to determine neighborhoods
map_venues = folium.Map(location=[latitude, longitude], zoom_start=12)

# Add neighborhood's boundary
folium.GeoJson(data="data/local-area-boundary.geojson",
                tooltip=folium.features.GeoJsonTooltip(
                    fields=['name'],
                    aliases=['Neighborhood name:']
                )
    ).add_to(map_venues)

# Add markers to map
for lat, lng, label in zip(df_olympic['LAT'], df_olympic['LON'], df_olympic['VENUE']):
    label = folium.Popup(label)
    folium.Marker(
        [lat, lng],
        popup=label
        ).add_to(map_venues)  
    
map_venues

In [14]:
# Add neighborhood to the dataframe
df_olympic = df_olympic.iloc[1:].reset_index(drop=True) # remove first venue because it's not in Vancouver
venues_neighborhoods = ['Mount Pleasant', 'Riley Park', 'Kensington-Cedar Cottage', 'Hastings-Sunrise', 
                 'Grandview-Woodland', 'Central Business District', 'Killarney', 'Central Business District', 'Central Business District']
df_olympic['NEIGHBOURHOOD'] = venues_neighborhoods
df_olympic

Unnamed: 0,VENUE,LAT,LON,NEIGHBOURHOOD
0,Vancouver Olympic and Paralympic Village,49.271252,-123.107513,Mount Pleasant
1,Vancouver Olympic Centre / Paralympic Centre,49.24427,-123.108887,Riley Park
2,Trout Lake Training Venue,49.254578,-123.064735,Kensington-Cedar Cottage
3,Pacific Coliseum,49.285896,-123.042763,Hastings-Sunrise
4,Britannia Training Venue,49.276035,-123.070617,Grandview-Woodland
5,Main Media Centre,49.288544,-123.116058,Central Business District
6,Killarney Training Venue,49.22683,-123.044777,Killarney
7,Canada Hockey Place,49.277912,-123.108788,Central Business District
8,BC Place,49.276672,-123.11219,Central Business District


In [15]:
# Convert UTM to latitude and longitude
def xy_to_lonlat(x, y):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=10, datum='WGS84')
    lonlat = pyproj.transform(proj_xy, proj_latlon, x, y)
    return lonlat[0], lonlat[1]

df1 = df.copy()
lon, lat = xy_to_lonlat(df['X'], df['Y'])
df1['LAT'] = lat.tolist()
df1['LON'] = lon.tolist()
df1 = df1.dropna()
df1.head()

  """


Unnamed: 0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD,X,Y,LAT,LON
1,Break and Enter Commercial,2019,3,7,2,6,10XX SITKA SQ,Fairview,490612.964805,5457110.0,49.266678,-123.129029
2,Break and Enter Commercial,2019,8,27,4,12,10XX ALBERNI ST,West End,491007.779775,5459174.0,49.285255,-123.123649
3,Break and Enter Commercial,2014,8,8,5,13,10XX ALBERNI ST,West End,491015.943352,5459166.0,49.285181,-123.123536
4,Break and Enter Commercial,2005,11,14,3,9,10XX ALBERNI ST,West End,491021.385727,5459161.0,49.285132,-123.123461
5,Break and Enter Commercial,2006,5,21,4,50,10XX ALBERNI ST,West End,491021.385727,5459161.0,49.285132,-123.123461


In [16]:
# Create heatmap of Vancouver with types of crime in 2010
crime_types = sorted(df1['TYPE'].unique())
map_vancouver = folium.Map(location=[latitude, longitude], zoom_start=12)

# Add neighborhood's boundary
folium.GeoJson(data="data/local-area-boundary.geojson", name="Neighborhood boundary", show=True,
                tooltip=folium.features.GeoJsonTooltip(
                    fields=['name'],
                    aliases=['Neighborhood name:'],            
                )
    ).add_to(map_vancouver)

# Add Olympic venues to map
for lat, lng, label in zip(df_olympic['LAT'], df_olympic['LON'], df_olympic['VENUE']):
    label = folium.Popup(label)
    folium.Marker(
        [lat, lng],
        popup=label
        ).add_to(map_vancouver)  
    
map_data_2010 = list(zip(df1.loc[df1['YEAR']==2010]['LAT'], df1.loc[df1['YEAR']==2010]['LON']))
HeatMap(map_data_2010).add_to(folium.FeatureGroup(name='All Crimes', show=True).add_to(map_vancouver))
for i in range(len(crime_types)):
    map_data_crime = list(zip(df1.loc[(df1['YEAR']==2010) & (df1['TYPE']==crime_types[i])]['LAT'], df1.loc[(df1['YEAR']==2010) & (df1['TYPE']==crime_types[i])]['LON']))
    HeatMap(map_data_crime).add_to(folium.FeatureGroup(name=crime_types[i], show=False).add_to(map_vancouver))
    
folium.LayerControl(collapsed=False).add_to(map_vancouver)
map_vancouver

In [17]:
# Calculate percentage change of crime in Feb 2009 vs Feb 2010
df2 = df.groupby(['YEAR', 'MONTH', 'TYPE']).count()['DAY'].unstack().reset_index()
df2.columns.name = None
df2['Total'] = df2.iloc[:,2:].sum(axis=1)
df2 = df2[((df2['YEAR']==2009) | (df2['YEAR']==2010)) & (df2['MONTH']==2)]
df2_change = ((df2.iloc[:,2:] - df2.iloc[0,2:])*100/(df2.iloc[0,2:])).round(2)
df2_change

Unnamed: 0,Break and Enter Commercial,Break and Enter Residential/Other,Homicide,Mischief,Offence Against a Person,Other Theft,Theft from Vehicle,Theft of Bicycle,Theft of Vehicle,Vehicle Collision or Pedestrian Struck (with Fatality),Vehicle Collision or Pedestrian Struck (with Injury),Total
73,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
85,-28.29,-22.4,,13.69,10.24,-13.65,-35.43,30.77,-37.2,,38.64,-16.69


Compared to the same period in 2009:
- Total crime decreased by 17%.
- Most crime categories dropped: Break and Enter Commercial down 28%, Break and Enter Residential down 22%, Other Theft down 14%, Theft from Vehicle down 35%, Theft of Vehicle down 37%. 
- Some crime categories increased: Mischief up 14%, Offence Against a Person 10%, Theft of Bicycle up 31%, Vehicle Collision Injury up 39%. 
- Data for serious crime categories like Homicide and Vehicle Collision with Fatality is not available in Feb 2010.

In [18]:
# Calculate percentage change of crime in 2009 vs 2010 (whole year)
df3 = df.groupby(['YEAR', 'TYPE']).count()['MONTH'].unstack().reset_index()
df3.columns.name = None
df3['Total'] = df3.iloc[:,1:].sum(axis=1)
df3 = df3[(df3['YEAR']==2009) | (df3['YEAR']==2010)] #[['YEAR','Homicide', 'Vehicle Collision or Pedestrian Struck (with Fatality)']]
df3_change = ((df3.iloc[:,1:] - df3.iloc[0,1:])*100/(df3.iloc[0,1:])).round(2)
df3_change

Unnamed: 0,Break and Enter Commercial,Break and Enter Residential/Other,Homicide,Mischief,Offence Against a Person,Other Theft,Theft from Vehicle,Theft of Bicycle,Theft of Vehicle,Vehicle Collision or Pedestrian Struck (with Fatality),Vehicle Collision or Pedestrian Struck (with Injury),Total
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,-10.82,-6.49,-44.44,1.74,-3.8,-6.28,-13.94,1.65,-22.05,-28.57,3.83,-7.69


- Homicide dropped 44% and Vehicle Collision with Fatality dropped 29% in 2010 compared to 2009. 

### Examine the impact of Olympic Games 2010 on crime in each neighborhood

In [19]:
# List of Olympic venues' neighborhoods
neighborhoods = sorted(df_olympic['NEIGHBOURHOOD'].unique())
neighborhoods

['Central Business District',
 'Grandview-Woodland',
 'Hastings-Sunrise',
 'Kensington-Cedar Cottage',
 'Killarney',
 'Mount Pleasant',
 'Riley Park']

In [20]:
# Percentage change for each venue's neighborhood in Feb 2009 vs Feb 2010
df4 = df.groupby(['YEAR','MONTH','NEIGHBOURHOOD','TYPE']).count()['DAY'].unstack().reset_index()
df4.columns.name = None
df4['Total'] = df4.iloc[:,3:].sum(axis=1)
df4 = df4[df4['NEIGHBOURHOOD'].isin(neighborhoods)]
df4_2009 = df4[(df4['YEAR']==2009) & (df4['MONTH']==2)].reset_index(drop=True) #.iloc[:,3:]
df4_2010 = df4[(df4['YEAR']==2010) & (df4['MONTH']==2)].reset_index(drop=True) #.iloc[:,3:]
df4_change = (((df4_2010.iloc[:,3:]).subtract(df4_2009.iloc[:,3:]))*100/(df4_2009.iloc[:,3:])).round(2)
df4_change.insert(0, 'NEIGHBOURHOOD', df4_2010['NEIGHBOURHOOD'])
df4_change

Unnamed: 0,NEIGHBOURHOOD,Break and Enter Commercial,Break and Enter Residential/Other,Mischief,Other Theft,Theft from Vehicle,Theft of Bicycle,Theft of Vehicle,Vehicle Collision or Pedestrian Struck (with Injury),Vehicle Collision or Pedestrian Struck (with Fatality),Total
0,Central Business District,-29.55,-65.22,40.28,-8.04,-57.54,155.56,-83.33,4.76,,-31.75
1,Grandview-Woodland,50.0,200.0,-48.15,-25.0,43.48,,200.0,50.0,,22.35
2,Hastings-Sunrise,-75.0,-41.18,25.0,150.0,-75.0,,16.67,-12.5,,-27.54
3,Kensington-Cedar Cottage,-25.0,-33.33,-34.62,-33.33,-12.28,200.0,-33.33,200.0,,-19.48
4,Killarney,-75.0,-10.0,166.67,-50.0,-35.0,,20.0,100.0,,-11.11
5,Mount Pleasant,-80.0,-41.67,37.5,-15.0,-16.33,50.0,-60.0,50.0,,-22.06
6,Riley Park,0.0,100.0,0.0,-50.0,-42.11,,-44.44,500.0,,0.0
