In [9]:
#We are evaluating crime levels in the boroughs of Greater London between 2011 and 2014. The first step is to look 
#at data for a single month within the time period and conduct EDA. 

#Thereafter we will build possible hypotheses such as has crime gone up in a certain time period or has a specific
#crime has increased across boroughs or how do two boroughs compare for the same kind of crime

#We begin by importing necessary directories 

import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [42]:
#importing the csv file containing data on street crimes for Dec 2010 across all boroughs in Greater England
#dropping unnecessary columns

df = pd.read_csv('/Users/mahimakaushiva/Desktop/Mod3Project/London Crime Data 2011 to 2014/2011-01-metropolitan-street.csv')
df = df.drop(columns=['Crime ID', 'Reported by', 'Falls within', 'LSOA code', 'Last outcome category', 'Context'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA name,Crime type
0,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
1,2011-01,0.140619,51.583427,On or near Rams Grove,Barking and Dagenham 001A,Anti-social behaviour
2,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
3,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
4,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,On or near Thorndike Street,Westminster 024F,Violent crime
99095,2011-01,-0.134491,51.488846,On or near Lupus Street,Westminster 024F,Violent crime
99096,2011-01,-0.134555,51.490466,On or near Garden Terrace,Westminster 024F,Other crime
99097,2011-01,-0.135419,51.489041,On or near St George'S Square,Westminster 024F,Other crime


In [43]:
#Since LSOA name contains both the name of the borough and the code, it is difficult to identify by borough alone
#Hence we separate the two 
#We also change location from on or near to just the name of the street for ease of use 

df['Borough'] = df['LSOA name'].str[:-5]
df['Location'] =df['Location'].str[11:]
df = df.drop(columns=['LSOA name'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,Crime type,Borough
0,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
1,2011-01,0.140619,51.583427,Rams Grove,Anti-social behaviour,Barking and Dagenham
2,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
3,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
4,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,Thorndike Street,Violent crime,Westminster
99095,2011-01,-0.134491,51.488846,Lupus Street,Violent crime,Westminster
99096,2011-01,-0.134555,51.490466,Garden Terrace,Other crime,Westminster
99097,2011-01,-0.135419,51.489041,St George'S Square,Other crime,Westminster


In [44]:
#Classifying incidence of crime as per boroughs

df.groupby(['Borough','Crime type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Borough,Crime type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Barking and Dagenham,Anti-social behaviour,801,801,801,801
Barking and Dagenham,Burglary,213,213,213,213
Barking and Dagenham,Other crime,653,653,653,653
Barking and Dagenham,Robbery,80,80,80,80
Barking and Dagenham,Vehicle crime,244,244,244,244
...,...,...,...,...,...
Westminster,Burglary,328,328,328,328
Westminster,Other crime,3793,3793,3793,3793
Westminster,Robbery,199,199,199,199
Westminster,Vehicle crime,281,281,281,281


In [45]:
#checking for crime type across boroughs

df.groupby(['Crime type', 'Borough']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Crime type,Borough,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Anti-social behaviour,Barking and Dagenham,801,801,801,801
Anti-social behaviour,Barnet,1115,1115,1115,1115
Anti-social behaviour,Bexley,648,648,648,648
Anti-social behaviour,Brent,1156,1156,1156,1156
Anti-social behaviour,Bromley,1017,1017,1017,1017
...,...,...,...,...,...
Violent crime,Sutton,179,179,179,179
Violent crime,Tower Hamlets,468,468,468,468
Violent crime,Waltham Forest,450,450,450,450
Violent crime,Wandsworth,298,298,298,298


In [46]:
#checking for the list of unique boroughs

boroughs = set(df['Borough'])
boroughs

{'Barking and Dagenham',
 'Barnet',
 'Bexley',
 'Brent',
 'Bromley',
 'Camden',
 'City of London',
 'Croydon',
 'Ealing',
 'Elmbridge',
 'Enfield',
 'Epping Forest',
 'Epsom and Ewell',
 'Greenwich',
 'Hackney',
 'Hammersmith and Fulham',
 'Haringey',
 'Harrow',
 'Havering',
 'Hertsmere',
 'Hillingdon',
 'Hounslow',
 'Islington',
 'Kensington and Chelsea',
 'Kingston upon Thames',
 'Lambeth',
 'Lewisham',
 'Merton',
 'Newham',
 'Redbridge',
 'Reigate and Banstead',
 'Richmond upon Thames',
 'Sevenoaks',
 'Slough',
 'Southwark',
 'Spelthorne',
 'Sutton',
 'Tandridge',
 'Three Rivers',
 'Thurrock',
 'Tower Hamlets',
 'Waltham Forest',
 'Wandsworth',
 'Westminster'}

In [48]:
#In order to build a hypothesis to view data across different files, we create a function that can loop over the time
# period 2011-2014 and repeat the functions we performed in the EDA above to run the code for this new dataframe

years = ['2011']
months = ['01']

for x in years:
    for y in months:
        filename = f'/Users/mahimakaushiva/Desktop/Mod3Project/London Crime Data 2011 to 2014/{x}-{y}-metropolitan-street.csv'
        df = pd.read_csv(filename)
df

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context
0,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
1,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.140619,51.583427,On or near Rams Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
2,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
3,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
4,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
...,...,...,...,...,...,...,...,...,...,...,...,...
99094,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.134817,51.490740,On or near Thorndike Street,E01004741,Westminster 024F,Violent crime,,
99095,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.134491,51.488846,On or near Lupus Street,E01004741,Westminster 024F,Violent crime,,
99096,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.134555,51.490466,On or near Garden Terrace,E01004741,Westminster 024F,Other crime,,
99097,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.135419,51.489041,On or near St George'S Square,E01004741,Westminster 024F,Other crime,,


In [50]:
df = df.drop(columns=['Crime ID', 'Reported by', 'Falls within', 'LSOA code', 'Last outcome category', 'Context'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA name,Crime type
0,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
1,2011-01,0.140619,51.583427,On or near Rams Grove,Barking and Dagenham 001A,Anti-social behaviour
2,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
3,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
4,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,On or near Thorndike Street,Westminster 024F,Violent crime
99095,2011-01,-0.134491,51.488846,On or near Lupus Street,Westminster 024F,Violent crime
99096,2011-01,-0.134555,51.490466,On or near Garden Terrace,Westminster 024F,Other crime
99097,2011-01,-0.135419,51.489041,On or near St George'S Square,Westminster 024F,Other crime


In [51]:
df['Borough'] = df['LSOA name'].str[:-5]
df['Location'] =df['Location'].str[11:]
df = df.drop(columns=['LSOA name'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,Crime type,Borough
0,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
1,2011-01,0.140619,51.583427,Rams Grove,Anti-social behaviour,Barking and Dagenham
2,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
3,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
4,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,Thorndike Street,Violent crime,Westminster
99095,2011-01,-0.134491,51.488846,Lupus Street,Violent crime,Westminster
99096,2011-01,-0.134555,51.490466,Garden Terrace,Other crime,Westminster
99097,2011-01,-0.135419,51.489041,St George'S Square,Other crime,Westminster


In [52]:
df.groupby(['Borough','Crime type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Borough,Crime type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Barking and Dagenham,Anti-social behaviour,801,801,801,801
Barking and Dagenham,Burglary,213,213,213,213
Barking and Dagenham,Other crime,653,653,653,653
Barking and Dagenham,Robbery,80,80,80,80
Barking and Dagenham,Vehicle crime,244,244,244,244
...,...,...,...,...,...
Westminster,Burglary,328,328,328,328
Westminster,Other crime,3793,3793,3793,3793
Westminster,Robbery,199,199,199,199
Westminster,Vehicle crime,281,281,281,281


In [53]:
df.groupby(['Crime type', 'Borough']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Crime type,Borough,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Anti-social behaviour,Barking and Dagenham,801,801,801,801
Anti-social behaviour,Barnet,1115,1115,1115,1115
Anti-social behaviour,Bexley,648,648,648,648
Anti-social behaviour,Brent,1156,1156,1156,1156
Anti-social behaviour,Bromley,1017,1017,1017,1017
...,...,...,...,...,...
Violent crime,Sutton,179,179,179,179
Violent crime,Tower Hamlets,468,468,468,468
Violent crime,Waltham Forest,450,450,450,450
Violent crime,Wandsworth,298,298,298,298


In [66]:
#Hypothesis 1: Burglary has increased in Westminster between Jan 2011 and Jan 2012

burglary_df = df[(df['Borough'] == 'Westminster') & ( df['Crime type'] == 'Burglary')]


burglary_df = burglary_df.drop(columns=['Longitude', 'Latitude', 'Location'])
burglary_df

Unnamed: 0,Month,Crime type,Borough
92058,2011-01,Burglary,Westminster
92059,2011-01,Burglary,Westminster
92080,2011-01,Burglary,Westminster
92081,2011-01,Burglary,Westminster
92082,2011-01,Burglary,Westminster
...,...,...,...
98676,2011-01,Burglary,Westminster
98714,2011-01,Burglary,Westminster
98719,2011-01,Burglary,Westminster
98731,2011-01,Burglary,Westminster


In [64]:
#Hypothesis 2: Vehicle crime across boroughs is the same throughout 2011

vehicle_df = df[df['Crime type'] == 'Vehicle crime']
vehicle_df = vehicle_df.drop(columns=['Longitude', 'Latitude', 'Location'])
vehicle_df

Unnamed: 0,Month,Crime type,Borough
25,2011-01,Vehicle crime,Barking and Dagenham
26,2011-01,Vehicle crime,Barking and Dagenham
27,2011-01,Vehicle crime,Barking and Dagenham
42,2011-01,Vehicle crime,Barking and Dagenham
43,2011-01,Vehicle crime,Barking and Dagenham
...,...,...,...
98793,2011-01,Vehicle crime,Westminster
98827,2011-01,Vehicle crime,Westminster
99029,2011-01,Vehicle crime,Westminster
99079,2011-01,Vehicle crime,Westminster
