In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import utm
from mpl_toolkits.basemap import Basemap
%pylab

Using matplotlib backend: Qt4Agg
Populating the interactive namespace from numpy and matplotlib


In [2]:
crime_data = pd.read_csv('crime_csv_all_years.csv')

In [3]:
crime_data[0:10]

Unnamed: 0,TYPE,YEAR,MONTH,HUNDRED_BLOCK,NEIGHBOURHOOD,N_HOOD,X,Y
0,Theft from Vehicle,2003,4,10XX SEYMOUR ST,Central Business District,Central Business District,491045.788677,5458359.80103
1,Theft from Vehicle,2003,5,4XX E HASTINGS ST,Strathcona,Strathcona,493071.086668,5458733.17469
2,Theft from Vehicle,2003,7,23XX W 6TH AVE,Kitsilano,Kitsilano,488383.628754,5457100.10939
3,Theft from Vehicle,2003,3,4XX E HASTINGS ST,Strathcona,Strathcona,493071.086668,5458733.17469
4,Offence Against a Person,2003,8,OFFSET TO PROTECT PRIVACY,,,0.0,0.0
5,Mischief,2003,8,82XX HUDSON ST,Marpole,Marpole,490222.830017,5450936.50802
6,Theft from Vehicle,2003,7,4XX E HASTINGS ST,Strathcona,Strathcona,493068.169174,5458724.11381
7,Mischief,2003,1,17XX E 63RD AVE,Victoria-Fraserview,Victoria-Fraserview,494915.829445,5451115.36741
8,BNE Residential/Other,2003,6,28XX TRINITY ST,Hastings-Sunrise,Hastings-Sunrise,496766.329742,5459657.73665
9,Theft from Vehicle,2003,3,4XX E HASTINGS ST,Strathcona,Strathcona,493068.169174,5458724.11381


In [4]:
# get coordinates of all crimes from the df
crime_coords = crime_data[['X','Y']]

In [5]:
# remove zeros from coordinates
crime_coords = crime_coords[(crime_data['X'] >0) & (crime_data['Y']>0)]

In [6]:
# make a list out of the coordinates
x = crime_coords['X'].tolist()
y = crime_coords['Y'].tolist()

In [7]:
# change to latitude and longitude
latlon = [utm.to_latlon(xs, ys, 10, northern = True) for xs, ys in zip(x,y)]

In [8]:
crimeLon, crimeLat = zip(*latlon)

In [9]:
# find the bounding box of the map
min(crimeLat),min(crimeLon),max(crimeLat),max(crimeLon)

(-123.2239549982749, 49.2008968420765, -123.02340179999538, 49.31334872713912)

In [10]:
fig1 = plt.figure(1)
ax0 = plt.subplot2grid((2,5),(0,0),colspan=2,rowspan=2)

map = Basemap(projection='merc', 
    resolution='f', area_thresh=0.05,  
    llcrnrlon=-123.3, llcrnrlat=49.15,  
    urcrnrlon=-123, urcrnrlat=49.35)
 
#map.drawlsmask(land_color='burlywood', ocean_color='w', resolution='f', lakes=True)
map.drawrivers(linewidth=1.0, color='0.8', antialiased=1)
map.drawcoastlines()
map.fillcontinents(color = 'burlywood')
map.drawmapboundary()

plt.show()
title('All crime')

<matplotlib.text.Text at 0x130f10b8>

In [11]:
# VPD offsets all locations to protect privacy. This is my manual adjustment. Can do better with image registration later.
cx, cy = map(crimeLat, crimeLon)
yoffset = 229 
cy = np.array(cy)+yoffset
map.plot(cx, cy, 'b.',markersize = 5, alpha=0.01)
plt.show()

In [12]:
ax1 = plt.subplot2grid((2,5),(0,2))
ax2 = plt.subplot2grid((2,5),(0,3))
ax3 = plt.subplot2grid((2,5),(0,4))
ax4 = plt.subplot2grid((2,5),(1,2))
ax5 = plt.subplot2grid((2,5),(1,3))
ax6 = plt.subplot2grid((2,5),(1,4))

In [32]:
years =np.sort(pd.unique(crime_data.YEAR))
years

array([2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015, 2016], dtype=int64)

In [30]:
types = pd.unique(crime_data.TYPE)
types

array(['Theft from Vehicle', 'Offence Against a Person', 'Mischief',
       'BNE Residential/Other', 'Other Theft', 'BNE Commercial',
       'Theft of Vehicle', 'Homicide'], dtype=object)

In [16]:
axlist = [ax1,ax2,ax3,ax4,ax5,ax6]

# for each type of crime, create a heat map of the crime locations
# for some reason they removed location data from assaults and homicides, so remove from mapped data
# I know this data is available somewhere, so I will try to get it later on
for counter, index in enumerate([0,2,3,4,5,6]):  
    type = types[index]
    
    # new df of crimes of each type
    sub_crime = crime_data[crime_data['TYPE']==type]
    
    # coordinates of the crime
    crime_coords = sub_crime[['X','Y']]
    
    # remove zeros from coordinates
    crime_coords = crime_coords[(crime_coords['X'] >0) & (crime_coords['Y']>0)]
    
    # make x,y coordinates into a list
    x = crime_coords['X'].tolist()
    y = crime_coords['Y'].tolist()

    # plot histogram
    #frame = plt.subplot(2,3,counter+1)
    frame = axlist[counter]
    plt.axes(frame)
    
    histplot = frame.hist2d(x, y, bins=100)
    title(type)
    xlim([484500,498000])
    ylim([5450000,5462000])
 
    frame.axes.xaxis.set_ticklabels([])
    frame.axes.yaxis.set_ticklabels([])


In [18]:
grouped = crime_data.groupby(['TYPE'])
grouped_types = grouped['YEAR'].value_counts()

In [19]:
# I realize this is inefficient, but I'm still not that familiar with pandas
# remove the current year from the data because it is not complete yet
all_data = years[:len(years)-1]
for type in types:
    data = grouped_types[type]
    array_data = np.vstack((data.index.values,data.values)).T
    array_data = array_data[array_data[:len(years)-1,0].argsort()]
    all_data = np.vstack((all_data,array_data[:,1]))

In [20]:
crime_totals = all_data[1:,:]
crime_totals.T

array([[16818,  3515,  6306,  6809, 12226,  3146,  6320,    18],
       [17250,  3792,  5538,  6471, 12056,  3250,  6073,    22],
       [15652,  3763,  5017,  5484, 12086,  2617,  5015,    22],
       [13964,  4354,  5121,  5586, 11221,  2815,  3663,    17],
       [11680,  4401,  4777,  4940, 10467,  2407,  3298,    19],
       [10540,  4218,  5179,  4380, 10215,  2204,  2413,    18],
       [ 9455,  3873,  4407,  3469, 10630,  1840,  1878,    18],
       [ 8222,  3732,  4463,  3245, 10568,  1642,  1464,    10],
       [ 7153,  3859,  4585,  3232, 10462,  1555,  1089,    15],
       [ 7686,  3772,  4202,  3296, 11324,  1664,  1149,     8],
       [ 8018,  3674,  4175,  3007, 11862,  1754,  1030,     6],
       [ 9743,  3146,  4483,  3043, 12690,  2215,  1285,     9],
       [10161,  3198,  4168,  3131, 13047,  2357,  1368,    14]], dtype=int64)

In [21]:
# order the data so that less frequent crimes will be at the top of the bar plot
ind = [0,4,3,2,6,1,5,7]
crime_totals = crime_totals[ind,:].T
types = types[ind]
types

array(['Theft from Vehicle', 'Other Theft', 'BNE Residential/Other',
       'Mischief', 'Theft of Vehicle', 'Offence Against a Person',
       'BNE Commercial', 'Homicide'], dtype=object)

In [23]:
import matplotlib.colors as colors
import matplotlib.cm as cm

In [24]:
cmap = cm.jet
c = cmap(linspace(0,1,len(types)))

In [25]:
y = years[:len(years)-1]
figure(2)
width = 0.35
ind = arange(len(years)-1)
#c = ['b','r','g','m','c','k','b','r','g','m','c','k']
plt.bar(ind, crime_totals[:,0], width, color=c[0,:]) #c[0,:]
b = 0
for itype in range(1,len(types)):
    itype
    b += crime_totals[:,itype-1]
    plt.bar(ind, crime_totals[:,itype], width, color=c[itype,:], bottom=b) #[itype,:]

plt.xlabel('Year')
plt.ylabel('Crimes')
plt.title('Crimes by year and type')
plt.xticks(ind + width/2., years)
plt.legend(types)

<matplotlib.legend.Legend at 0x10431ef0>