In [1]:
import folium
from folium.plugins import HeatMap
import pandas as pd
import geopandas as gpd
import shapely as sh
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import mplleaflet

%matplotlib inline


In [2]:
sfmap = folium.Map(location=[37.76, -122.45], tiles="Stamen Terrain", zoom_start=12)

## Plot SFPD Districts

In [3]:
sfpddistricts = gpd.read_file("./data/CurrentPoliceDistricts/geo_export_b221c4a1-a468-437e-bd99-da16a1504a7b.shp")

In [4]:
f = plt.figure(figsize=(10, 10))
ax = f.gca()
sfpddistricts.plot(ax=ax)
mplleaflet.display(fig=f)

## Plot SF Neighborhoods

In [5]:
sfneighborhoods = gpd.read_file("./data/AnalysisNeighborhoods/geo_export_e6ad2522-9d21-4211-be48-e66ab369225f.shp")

In [6]:
f = plt.figure(figsize=(12, 12))
ax = f.gca()
sfneighborhoods.plot(ax=ax)
mplleaflet.display(fig=f)

In [7]:
sfcrime = pd.read_csv('data/sfcrime_no_wc.csv')
sfcrime.head(1)

Unnamed: 0.1,Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Location,latitude,longitude,geometry,Neighborhoods,Hour,Day,Year,Type
0,0,150060275,NON-CRIMINAL,LOST PROPERTY,Monday,2015-01-19 00:00:00,1900-01-01 14:00:00,MISSION,"(37.7617007179518, -122.42158168137)",37.761701,-122.421582,POINT (-122.42158168137 37.7617007179518),Mission,14,0,2015,OTHER


## Plot choropleth graph of all crime

In [8]:
# definition of the boundaries in the map
district_geo = r'./data/AnalysisNeighborhoods/AnalysisNeighborhoods.json'
SF_COORDINATES = (37.76, -122.45)

In [9]:
# calculating total number of incidents per district
crimedata2 = pd.DataFrame(sfcrime['Neighborhoods'].value_counts().astype(float))
crimedata2 = crimedata2.reset_index()
crimedata2.columns = ['Neighborhoods', 'Number']

In [10]:
# creation of the choropleth
map1 = folium.Map(location=SF_COORDINATES, zoom_start=12)
map1.choropleth(geo_path = district_geo,
                data = crimedata2,
                columns = ['Neighborhoods', 'Number'],
                key_on = 'feature.properties.nhood',
                threshold_scale=[0, 30000, 80000, 120000, 180000, 240000],
                fill_color = 'YlOrRd', 
                fill_opacity = 0.7, 
                line_opacity = 0.2,
                legend_name = 'Number of incidents per neighborhood')

map1

In [11]:
McLaren = pd.read_csv('data/McLarenParkProbs.csv')
McLaren.head()

Unnamed: 0.1,Unnamed: 0,Day,Hour,latitude,longitude,Crime_Probability,variable,value
0,0,1,22,37.718279,-122.425836,0.356366,Type_NON-VIOLENT,1.0
1,4,4,8,37.71782,-122.407272,0.318649,Type_NON-VIOLENT,1.0
2,8,4,7,37.710103,-122.424549,0.287613,Type_NON-VIOLENT,1.0
3,9,3,17,37.718279,-122.425836,0.346605,Type_NON-VIOLENT,1.0
4,15,4,9,37.716414,-122.410433,0.318649,Type_NON-VIOLENT,1.0


In [12]:
McLaren['variable'] = McLaren['variable'].apply(lambda x: x.replace('Type_',''))

In [13]:
McLaren.head()

Unnamed: 0.1,Unnamed: 0,Day,Hour,latitude,longitude,Crime_Probability,variable,value
0,0,1,22,37.718279,-122.425836,0.356366,NON-VIOLENT,1.0
1,4,4,8,37.71782,-122.407272,0.318649,NON-VIOLENT,1.0
2,8,4,7,37.710103,-122.424549,0.287613,NON-VIOLENT,1.0
3,9,3,17,37.718279,-122.425836,0.346605,NON-VIOLENT,1.0
4,15,4,9,37.716414,-122.410433,0.318649,NON-VIOLENT,1.0


In [14]:
### create interactive map with probilities

In [15]:
def crime_time_type(day, hour='all'):
    msk1 = McLaren['Day'] == day
    
    if hour == 'all':
        crimedf = pd.DataFrame(McLaren[['latitude','longitude','Crime_Probability', 'variable']].ix[msk1]).reset_index(drop=True)
    else:
        msk2 = McLaren['Hour'] == hour
        msk = pd.concat((msk1, msk2), axis=1)
        slct = msk.all(axis=1)
        crimedf = pd.DataFrame(McLaren[['latitude','longitude','Crime_Probability', 'variable']].ix[slct]).reset_index(drop=True)
        
    crimedf['radius'] = crimedf['Crime_Probability'] * 80
    crimedf['fill_opacity'] = crimedf['Crime_Probability']
    return crimedf

def park_map_prob(day, hour='all'):
    McLarenPark = (37.7180842, -122.4190721)
    park = folium.Map(location=McLarenPark, zoom_start=15, tiles="Stamen Terrain")
    
    probdf = crime_time_type(day, hour)
    for row in probdf.iterrows():
        
        if row[1][3] == 'VIOLENT':
            folium.CircleMarker(location=[row[1][0],row[1][1]], radius=row[1][4], color='red',
                            weight=0, fill_color='red', fill_opacity=row[1][5], popup=row[1][3]).add_to(park)
        if row[1][3] == 'NON-VIOLENT':
            folium.CircleMarker(location=[row[1][0],row[1][1]], radius=row[1][4], color='orange', 
                                weight=0, fill_color='orange', fill_opacity=row[1][5], popup=row[1][3]).add_to(park)
        if row[1][3] == 'OTHER':
            folium.CircleMarker(location=[row[1][0],row[1][1]], radius=row[1][4], color='yellow', 
                                weight=0, fill_color='yellow', fill_opacity=row[1][5], popup=row[1][3]).add_to(park)
        if row[1][3] == 'PROPERTY':
            folium.CircleMarker(location=[row[1][0],row[1][1]], radius=row[1][4], color='green', 
                                weight=0, fill_color='green', fill_opacity=row[1][5], popup=row[1][3]).add_to(park)
    return park

In [16]:
park_map_prob(0)

In [17]:
string = "Crime on Mondays at 5pm"
day_name = {'Mon': 0, 'Tue': 1, 'Wed': 2, 'Thu': 3, 'Fri': 4, 'Sat': 5, 'Sun': 6}
def get_weekday(string):
    string = string.upper()
    words = string.split(' ')
    return words

get_weekday(string)

['CRIME', 'ON', 'MONDAYS', 'AT', '5PM']