In [1]:
import pandas as pd
import numpy as np
import shapefile as shp
import folium
from folium.plugins import HeatMap
from folium.features import DivIcon
from selenium import webdriver
import matplotlib.pyplot as plt
import time

In [2]:
# Get the data
df = pd.read_csv('../train.csv')
df.head()

Unnamed: 0,Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y
0,2015-05-13 23:53:00,WARRANTS,WARRANT ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",OAK ST / LAGUNA ST,-122.425892,37.774599
1,2015-05-13 23:53:00,OTHER OFFENSES,TRAFFIC VIOLATION ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",OAK ST / LAGUNA ST,-122.425892,37.774599
2,2015-05-13 23:33:00,OTHER OFFENSES,TRAFFIC VIOLATION ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",VANNESS AV / GREENWICH ST,-122.424363,37.800414
3,2015-05-13 23:30:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Wednesday,NORTHERN,NONE,1500 Block of LOMBARD ST,-122.426995,37.800873
4,2015-05-13 23:30:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Wednesday,PARK,NONE,100 Block of BRODERICK ST,-122.438738,37.771541


In [3]:
# Remove Arctic coordinates
df.drop(df[df.Y == 90.0].index, inplace=True)

In [4]:
# Get map coordinates
mean_lat = np.mean(df['X'])
mean_lon = np.mean(df['Y'])
print(mean_lat)
print(mean_lon)

sw = df[['Y', 'X']].min().values.tolist()
ne = df[['Y', 'X']].max().values.tolist()
print(sw)
print(ne)

-122.42276317251697
37.76703463356764
[37.7078790224135, -122.51364206429]
[37.81997549229701, -122.36493749408]


In [5]:
# Create column with combined XY coordinates
district_labels = {
    'SOUTHERN': '37.774432, -122.401121',
    'BAYVIEW': '37.734332, -122.389920',
    'MISSION': '37.756478, -122.423663',
    'NORTHERN': '37.787740, -122.430300',
    'TENDERLOIN': '37.781980, -122.412981',
    'CENTRAL': '37.796200, -122.409293',
    'PARK': '37.765352, -122.449282',
    'RICHMOND': '37.776204, -122.483285',
    'INGLESIDE': '37.726817, -122.437207',
    'TARAVAL': '37.737775, -122.484375'
}

df['Coordinates'] = df['X'].astype(str) + ', ' + df['Y'].astype(str)
df.head()

Unnamed: 0,Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y,Coordinates
0,2015-05-13 23:53:00,WARRANTS,WARRANT ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",OAK ST / LAGUNA ST,-122.425892,37.774599,"-122.425891675136, 37.7745985956747"
1,2015-05-13 23:53:00,OTHER OFFENSES,TRAFFIC VIOLATION ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",OAK ST / LAGUNA ST,-122.425892,37.774599,"-122.425891675136, 37.7745985956747"
2,2015-05-13 23:33:00,OTHER OFFENSES,TRAFFIC VIOLATION ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",VANNESS AV / GREENWICH ST,-122.424363,37.800414,"-122.42436302145, 37.8004143219856"
3,2015-05-13 23:30:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Wednesday,NORTHERN,NONE,1500 Block of LOMBARD ST,-122.426995,37.800873,"-122.426995326766, 37.80087263276921"
4,2015-05-13 23:30:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Wednesday,PARK,NONE,100 Block of BRODERICK ST,-122.438738,37.771541,"-122.438737622757, 37.7715411720578"


In [6]:
# Import Police Department District Borders
district_outlines = 'districts.geojson'

In [15]:
def create_cat_df(cat, df):
    cat_df = df[df.Category == cat]
    cat_df = cat_df.groupby('Coordinates').count().reset_index()
    cat_df['X'] = cat_df.Coordinates.apply(lambda x: float(x.split(",")[0]))
    cat_df['Y'] = cat_df.Coordinates.apply(lambda x: float(x.split(",")[1]))
    cat_df = cat_df[['X', 'Y', 'DayOfWeek']]
    cat_df.columns = ['X', 'Y', 'Count']
    max_count = float(np.max(cat_df.Count.values))
    return cat_df, max_count

In [27]:
def create_map(x, y, counts, cat):
    hmap = folium.Map(location=[mean_lon, mean_lat], zoom_start=12)

    hm_wide = HeatMap(list(zip(y, x, counts)),
                      min_opacity=0.2,
                      radius=17, 
                      blur=15,
                      max_zoom=1)

    folium.GeoJson(
        district_outlines,
        style_function = lambda x: {'opacity':1,
                                    'weight':1,
                                    'color': '#000000',
                                    'fillColor': '#00FFFFFF'},
        tooltip=folium.GeoJsonTooltip(
            fields=['district'],
            localize=True
        ),
        name='geojson'
    ).add_to(hmap)
    
    hmap.add_child(hm_wide)
    
    for district in df.PdDistrict.unique():
        coords = district_labels[district]
        x_coord = float(coords.split(',')[0]) + 0.003
        y_coord = float(coords.split(',')[1]) - 0.005
        
        folium.Marker(
            [x_coord, y_coord],
            icon=DivIcon(
                icon_size=(150,36),
                icon_anchor=(0,0),
                html='<div style="font-size: 8pt"><strong>%s<strong></div>' % district)
            ).add_to(hmap)

    hmap.fit_bounds([sw, ne])
    
    # Save the map as an HTML file
    fn='temp-map.html'
    hmap.save(fn)

    # Open a browser window...
    browser = webdriver.Firefox(executable_path="./geckodriver")
    # ... that displays the map
    browser.get('C:path/to/temp-map.html') # full path to file
    # Give the map tiles some time to load
    delay = 5
    time.sleep(delay)
    # Grab the screenshot
    if "/" in cat:
        cat_name = cat.replace('/', '-')
    else:
        cat_name = cat
    browser.save_screenshot('{}.png'.format(cat_name))
    # Close the browser
    browser.quit()

In [None]:
# Create a heatmap for each category
for cat in df.Category.unique():
    cat_df, max_count = create_cat_df(cat, df)
    
    X_coords = [float(x) for x in cat_df.X.values]
    Y_coords = [float(x) for x in cat_df.Y.values]
    counts = [float(x) for x in cat_df.Count.values]
    create_map(X_coords, Y_coords, counts, cat)

In [None]:
# Create a heatmap for each category without hall of justice
# Crimes without recorded location often receive HOJ as their location,
# distorting the heatmap
hoj_coord = df['Coordinates'].value_counts().index[0]
nohoj = df[df.Coordinates != hoj_coord]

for cat in nohoj.Category.unique():
    cat_df, max_count = create_cat_df(cat, nohoj)
    
    X_coords = [float(x) for x in cat_df.X.values]
    Y_coords = [float(x) for x in cat_df.Y.values]
    counts = [float(x) for x in cat_df.Count.values]
    create_map(X_coords, Y_coords, counts, cat)

In [20]:
# Create a heatmap of ALL categories together
df.head()
df['Category']='ALL'

for cat in df.Category.unique():
    cat_df, max_count = create_cat_df(cat, df)
    
    X_coords = [float(x) for x in cat_df.X.values]
    Y_coords = [float(x) for x in cat_df.Y.values]
    counts = [float(x) for x in cat_df.Count.values]
    create_map(X_coords, Y_coords, counts, cat)