In [1]:
#We are evaluating crime levels in the boroughs of Greater London between 2011 and 2014. The first step is to look 
#at data for a single month within the time period and conduct EDA. 

#Thereafter we will build possible hypotheses such as has crime gone up in a certain time period or has a specific
#crime has increased across boroughs or how do two boroughs compare for the same kind of crime

#We begin by importing necessary directories 

import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
#importing the csv file containing data on street crimes for Dec 2010 across all boroughs in Greater England
#dropping unnecessary columns

df = pd.read_csv('/Users/mahimakaushiva/Desktop/Mod3Project/London Crime Data 2011 to 2014/2011-01-metropolitan-street.csv')
df = df.drop(columns=['Crime ID', 'Reported by', 'Falls within', 'LSOA code', 'Last outcome category', 'Context'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA name,Crime type
0,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
1,2011-01,0.140619,51.583427,On or near Rams Grove,Barking and Dagenham 001A,Anti-social behaviour
2,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
3,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
4,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,On or near Thorndike Street,Westminster 024F,Violent crime
99095,2011-01,-0.134491,51.488846,On or near Lupus Street,Westminster 024F,Violent crime
99096,2011-01,-0.134555,51.490466,On or near Garden Terrace,Westminster 024F,Other crime
99097,2011-01,-0.135419,51.489041,On or near St George'S Square,Westminster 024F,Other crime


In [3]:
#Since LSOA name contains both the name of the borough and the code, it is difficult to identify by borough alone
#Hence we separate the two 
#We also change location from on or near to just the name of the street for ease of use 

df['Borough'] = df['LSOA name'].str[:-5]
df['Location'] =df['Location'].str[11:]
df = df.drop(columns=['LSOA name'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,Crime type,Borough
0,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
1,2011-01,0.140619,51.583427,Rams Grove,Anti-social behaviour,Barking and Dagenham
2,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
3,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
4,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,Thorndike Street,Violent crime,Westminster
99095,2011-01,-0.134491,51.488846,Lupus Street,Violent crime,Westminster
99096,2011-01,-0.134555,51.490466,Garden Terrace,Other crime,Westminster
99097,2011-01,-0.135419,51.489041,St George'S Square,Other crime,Westminster


In [4]:
#Classifying incidence of crime as per boroughs

df.groupby(['Borough','Crime type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Borough,Crime type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Barking and Dagenham,Anti-social behaviour,801,801,801,801
Barking and Dagenham,Burglary,213,213,213,213
Barking and Dagenham,Other crime,653,653,653,653
Barking and Dagenham,Robbery,80,80,80,80
Barking and Dagenham,Vehicle crime,244,244,244,244
...,...,...,...,...,...
Westminster,Burglary,328,328,328,328
Westminster,Other crime,3793,3793,3793,3793
Westminster,Robbery,199,199,199,199
Westminster,Vehicle crime,281,281,281,281


In [5]:
#checking for crime type across boroughs

df.groupby(['Crime type', 'Borough']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Crime type,Borough,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Anti-social behaviour,Barking and Dagenham,801,801,801,801
Anti-social behaviour,Barnet,1115,1115,1115,1115
Anti-social behaviour,Bexley,648,648,648,648
Anti-social behaviour,Brent,1156,1156,1156,1156
Anti-social behaviour,Bromley,1017,1017,1017,1017
...,...,...,...,...,...
Violent crime,Sutton,179,179,179,179
Violent crime,Tower Hamlets,468,468,468,468
Violent crime,Waltham Forest,450,450,450,450
Violent crime,Wandsworth,298,298,298,298


In [6]:
#checking for the list of unique boroughs

boroughs = set(df['Borough'])
boroughs

{'Barking and Dagenham',
 'Barnet',
 'Bexley',
 'Brent',
 'Bromley',
 'Camden',
 'City of London',
 'Croydon',
 'Ealing',
 'Elmbridge',
 'Enfield',
 'Epping Forest',
 'Epsom and Ewell',
 'Greenwich',
 'Hackney',
 'Hammersmith and Fulham',
 'Haringey',
 'Harrow',
 'Havering',
 'Hertsmere',
 'Hillingdon',
 'Hounslow',
 'Islington',
 'Kensington and Chelsea',
 'Kingston upon Thames',
 'Lambeth',
 'Lewisham',
 'Merton',
 'Newham',
 'Redbridge',
 'Reigate and Banstead',
 'Richmond upon Thames',
 'Sevenoaks',
 'Slough',
 'Southwark',
 'Spelthorne',
 'Sutton',
 'Tandridge',
 'Three Rivers',
 'Thurrock',
 'Tower Hamlets',
 'Waltham Forest',
 'Wandsworth',
 'Westminster'}

In [7]:
#checking for the list of unique crimes

crimes = set(df['Crime type'])
crimes

{'Anti-social behaviour',
 'Burglary',
 'Other crime',
 'Robbery',
 'Vehicle crime',
 'Violent crime'}

In [8]:
#In order to build a hypothesis to view data across different files, we create a function that can loop over the time
# period 2011-2014 and repeat the functions we performed in the EDA above to run the code for this new dataframe

years = ['2011']
months = ['01']

for x in years:
    for y in months:
        filename = f'/Users/mahimakaushiva/Desktop/Mod3Project/London Crime Data 2011 to 2014/{x}-{y}-metropolitan-street.csv'
        df = pd.read_csv(filename)
df

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context
0,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
1,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.140619,51.583427,On or near Rams Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
2,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
3,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
4,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
...,...,...,...,...,...,...,...,...,...,...,...,...
99094,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.134817,51.490740,On or near Thorndike Street,E01004741,Westminster 024F,Violent crime,,
99095,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.134491,51.488846,On or near Lupus Street,E01004741,Westminster 024F,Violent crime,,
99096,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.134555,51.490466,On or near Garden Terrace,E01004741,Westminster 024F,Other crime,,
99097,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.135419,51.489041,On or near St George'S Square,E01004741,Westminster 024F,Other crime,,


In [9]:
df = df.drop(columns=['Crime ID', 'Reported by', 'Falls within', 'LSOA code', 'Last outcome category', 'Context'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA name,Crime type
0,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
1,2011-01,0.140619,51.583427,On or near Rams Grove,Barking and Dagenham 001A,Anti-social behaviour
2,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
3,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
4,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,On or near Thorndike Street,Westminster 024F,Violent crime
99095,2011-01,-0.134491,51.488846,On or near Lupus Street,Westminster 024F,Violent crime
99096,2011-01,-0.134555,51.490466,On or near Garden Terrace,Westminster 024F,Other crime
99097,2011-01,-0.135419,51.489041,On or near St George'S Square,Westminster 024F,Other crime


In [10]:
df['Borough'] = df['LSOA name'].str[:-5]
df['Location'] =df['Location'].str[11:]
df = df.drop(columns=['LSOA name'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,Crime type,Borough
0,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
1,2011-01,0.140619,51.583427,Rams Grove,Anti-social behaviour,Barking and Dagenham
2,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
3,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
4,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,Thorndike Street,Violent crime,Westminster
99095,2011-01,-0.134491,51.488846,Lupus Street,Violent crime,Westminster
99096,2011-01,-0.134555,51.490466,Garden Terrace,Other crime,Westminster
99097,2011-01,-0.135419,51.489041,St George'S Square,Other crime,Westminster


In [11]:
df.groupby(['Borough','Crime type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Borough,Crime type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Barking and Dagenham,Anti-social behaviour,801,801,801,801
Barking and Dagenham,Burglary,213,213,213,213
Barking and Dagenham,Other crime,653,653,653,653
Barking and Dagenham,Robbery,80,80,80,80
Barking and Dagenham,Vehicle crime,244,244,244,244
...,...,...,...,...,...
Westminster,Burglary,328,328,328,328
Westminster,Other crime,3793,3793,3793,3793
Westminster,Robbery,199,199,199,199
Westminster,Vehicle crime,281,281,281,281


In [12]:
df.groupby(['Crime type', 'Borough']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Crime type,Borough,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Anti-social behaviour,Barking and Dagenham,801,801,801,801
Anti-social behaviour,Barnet,1115,1115,1115,1115
Anti-social behaviour,Bexley,648,648,648,648
Anti-social behaviour,Brent,1156,1156,1156,1156
Anti-social behaviour,Bromley,1017,1017,1017,1017
...,...,...,...,...,...
Violent crime,Sutton,179,179,179,179
Violent crime,Tower Hamlets,468,468,468,468
Violent crime,Waltham Forest,450,450,450,450
Violent crime,Wandsworth,298,298,298,298


In [13]:
#Hypothesis 1: Burglary has decreased in Westminster between Jan 2011 and Jan 2013

burglary_df = df[(df['Borough'] == 'Westminster') & ( df['Crime type'] == 'Burglary')]


burglary_df = burglary_df.drop(columns=['Longitude', 'Latitude', 'Location'])
burglary_df

Unnamed: 0,Month,Crime type,Borough
92058,2011-01,Burglary,Westminster
92059,2011-01,Burglary,Westminster
92080,2011-01,Burglary,Westminster
92081,2011-01,Burglary,Westminster
92082,2011-01,Burglary,Westminster
...,...,...,...
98676,2011-01,Burglary,Westminster
98714,2011-01,Burglary,Westminster
98719,2011-01,Burglary,Westminster
98731,2011-01,Burglary,Westminster


In [20]:
len(burglary_df)

328

In [21]:
#In order to build a hypothesis to view data across different files, we create a function that can loop over the time
# period 2011-2014 and repeat the functions we performed in the EDA above to run the code for this new dataframe

years = ['2012']
months = ['01']

for x in years:
    for y in months:
        filename = f'/Users/mahimakaushiva/Desktop/Mod3Project/London Crime Data 2011 to 2014/{x}-{y}-metropolitan-street.csv'
        df = pd.read_csv(filename)
df

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context
0,,2012-01,Metropolitan Police Service,Metropolitan Police Service,0.137065,51.583672,On or near Police Station,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
1,,2012-01,Metropolitan Police Service,Metropolitan Police Service,0.137065,51.583672,On or near Police Station,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
2,,2012-01,Metropolitan Police Service,Metropolitan Police Service,0.140035,51.589112,On or near Beansland Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
3,,2012-01,Metropolitan Police Service,Metropolitan Police Service,0.140192,51.582311,On or near Hatch Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
4,,2012-01,Metropolitan Police Service,Metropolitan Police Service,0.135866,51.587336,On or near Gibbfield Close,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
...,...,...,...,...,...,...,...,...,...,...,...,...
91027,fc5d6b3bb7e92655b3c73ad7290de6bdb1f91bb3013b06...,2012-01,Metropolitan Police Service,Metropolitan Police Service,-0.134554,51.490475,On or near Garden Terrace,E01004741,Westminster 024F,Robbery,,
91028,f495f695886a8f85e936dde8f4b63b9dc8aa713597c8bf...,2012-01,Metropolitan Police Service,Metropolitan Police Service,-0.134389,51.488872,On or near Lupus Street,E01004741,Westminster 024F,Robbery,,
91029,85a3e3a34a08e6dbe3118acfcaff665dea062ba55f4eb4...,2012-01,Metropolitan Police Service,Metropolitan Police Service,-0.134817,51.490740,On or near Thorndike Street,E01004741,Westminster 024F,Robbery,,
91030,dd61df766bfc4abfdcaf3778c5d93cd85d0a3051b98a46...,2012-01,Metropolitan Police Service,Metropolitan Police Service,-0.134389,51.488872,On or near Lupus Street,E01004741,Westminster 024F,Violent crime,,


In [22]:
df = df.drop(columns=['Crime ID', 'Reported by', 'Falls within', 'LSOA code', 'Last outcome category', 'Context'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA name,Crime type
0,2012-01,0.137065,51.583672,On or near Police Station,Barking and Dagenham 001A,Anti-social behaviour
1,2012-01,0.137065,51.583672,On or near Police Station,Barking and Dagenham 001A,Anti-social behaviour
2,2012-01,0.140035,51.589112,On or near Beansland Grove,Barking and Dagenham 001A,Anti-social behaviour
3,2012-01,0.140192,51.582311,On or near Hatch Grove,Barking and Dagenham 001A,Anti-social behaviour
4,2012-01,0.135866,51.587336,On or near Gibbfield Close,Barking and Dagenham 001A,Anti-social behaviour
...,...,...,...,...,...,...
91027,2012-01,-0.134554,51.490475,On or near Garden Terrace,Westminster 024F,Robbery
91028,2012-01,-0.134389,51.488872,On or near Lupus Street,Westminster 024F,Robbery
91029,2012-01,-0.134817,51.490740,On or near Thorndike Street,Westminster 024F,Robbery
91030,2012-01,-0.134389,51.488872,On or near Lupus Street,Westminster 024F,Violent crime


In [23]:
df['Borough'] = df['LSOA name'].str[:-5]
df['Location'] =df['Location'].str[11:]
df = df.drop(columns=['LSOA name'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,Crime type,Borough
0,2012-01,0.137065,51.583672,Police Station,Anti-social behaviour,Barking and Dagenham
1,2012-01,0.137065,51.583672,Police Station,Anti-social behaviour,Barking and Dagenham
2,2012-01,0.140035,51.589112,Beansland Grove,Anti-social behaviour,Barking and Dagenham
3,2012-01,0.140192,51.582311,Hatch Grove,Anti-social behaviour,Barking and Dagenham
4,2012-01,0.135866,51.587336,Gibbfield Close,Anti-social behaviour,Barking and Dagenham
...,...,...,...,...,...,...
91027,2012-01,-0.134554,51.490475,Garden Terrace,Robbery,Westminster
91028,2012-01,-0.134389,51.488872,Lupus Street,Robbery,Westminster
91029,2012-01,-0.134817,51.490740,Thorndike Street,Robbery,Westminster
91030,2012-01,-0.134389,51.488872,Lupus Street,Violent crime,Westminster


In [24]:
df.groupby(['Borough','Crime type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Borough,Crime type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Barking and Dagenham,Anti-social behaviour,614,614,614,614
Barking and Dagenham,Burglary,229,229,229,229
Barking and Dagenham,Criminal damage and arson,144,144,144,144
Barking and Dagenham,Drugs,90,90,90,90
Barking and Dagenham,Other crime,117,117,117,117
...,...,...,...,...,...
Westminster,Public disorder and weapons,205,205,205,205
Westminster,Robbery,122,122,122,122
Westminster,Shoplifting,338,338,338,338
Westminster,Vehicle crime,237,237,237,237


In [25]:
#Hypothesis 1: Burglary has decreased in Westminster between Jan 2011 and Jan 2013

burglary_df = df[(df['Borough'] == 'Westminster') & ( df['Crime type'] == 'Burglary')]


burglary_df = burglary_df.drop(columns=['Longitude', 'Latitude', 'Location'])
burglary_df

Unnamed: 0,Month,Crime type,Borough
84270,2012-01,Burglary,Westminster
84271,2012-01,Burglary,Westminster
84272,2012-01,Burglary,Westminster
84287,2012-01,Burglary,Westminster
84288,2012-01,Burglary,Westminster
...,...,...,...
90856,2012-01,Burglary,Westminster
90857,2012-01,Burglary,Westminster
90858,2012-01,Burglary,Westminster
90859,2012-01,Burglary,Westminster


In [26]:
len(burglary_df)

310

In [27]:
#In order to build a hypothesis to view data across different files, we create a function that can loop over the time
# period 2011-2014 and repeat the functions we performed in the EDA above to run the code for this new dataframe

years = ['2013']
months = ['01']

for x in years:
    for y in months:
        filename = f'/Users/mahimakaushiva/Desktop/Mod3Project/London Crime Data 2011 to 2014/{x}-{y}-metropolitan-street.csv'
        df = pd.read_csv(filename)
df

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context
0,3639c80728b90690a99fbe4b4539d09cdca65c4d82ed33...,2013-01,Metropolitan Police Service,Metropolitan Police Service,-0.579818,50.816207,On or near Prison,E01031470,Arun 006A,Other crime,Court result unavailable,
1,8b87843d8fd523ff63447a6bc67d0b08492a7600b851e3...,2013-01,Metropolitan Police Service,Metropolitan Police Service,-0.805749,51.798149,On or near Lower Road,E01017687,Aylesbury Vale 019A,Violent crime,Under investigation,
2,a3b26456448702399aaa37cbf0472a060bc87f5a54931c...,2013-01,Metropolitan Police Service,Metropolitan Police Service,-0.805749,51.798149,On or near Lower Road,E01017687,Aylesbury Vale 019A,Violent crime,Under investigation,
3,,2013-01,Metropolitan Police Service,Metropolitan Police Service,0.140634,51.583427,On or near Rams Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
4,,2013-01,Metropolitan Police Service,Metropolitan Police Service,0.136513,51.588214,On or near Kingston Close,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
...,...,...,...,...,...,...,...,...,...,...,...,...
84329,6a6b6a951fc52fcde7651c8a1cb3966d4f2b8b0f02b845...,2013-01,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Offender given a caution,
84330,a29d3691ac3bdf2d3ecfd3b0bca6876ecf0ae027cb13a3...,2013-01,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Court result unavailable,
84331,ede539c372fc4a3a8665a9107ee40a15cd08e9a4b7dd3b...,2013-01,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Investigation complete; no suspect identified,
84332,5baf5cfd5b5b5cf9915b8d3af69fef96889574cc39f8ec...,2013-01,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Investigation complete; no suspect identified,


In [28]:
df = df.drop(columns=['Crime ID', 'Reported by', 'Falls within', 'LSOA code', 'Last outcome category', 'Context'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA name,Crime type
0,2013-01,-0.579818,50.816207,On or near Prison,Arun 006A,Other crime
1,2013-01,-0.805749,51.798149,On or near Lower Road,Aylesbury Vale 019A,Violent crime
2,2013-01,-0.805749,51.798149,On or near Lower Road,Aylesbury Vale 019A,Violent crime
3,2013-01,0.140634,51.583427,On or near Rams Grove,Barking and Dagenham 001A,Anti-social behaviour
4,2013-01,0.136513,51.588214,On or near Kingston Close,Barking and Dagenham 001A,Anti-social behaviour
...,...,...,...,...,...,...
84329,2013-01,,,No Location,,Other crime
84330,2013-01,,,No Location,,Other crime
84331,2013-01,,,No Location,,Other crime
84332,2013-01,,,No Location,,Other crime


In [29]:
df['Borough'] = df['LSOA name'].str[:-5]
df['Location'] =df['Location'].str[11:]
df = df.drop(columns=['LSOA name'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,Crime type,Borough
0,2013-01,-0.579818,50.816207,Prison,Other crime,Arun
1,2013-01,-0.805749,51.798149,Lower Road,Violent crime,Aylesbury Vale
2,2013-01,-0.805749,51.798149,Lower Road,Violent crime,Aylesbury Vale
3,2013-01,0.140634,51.583427,Rams Grove,Anti-social behaviour,Barking and Dagenham
4,2013-01,0.136513,51.588214,Kingston Close,Anti-social behaviour,Barking and Dagenham
...,...,...,...,...,...,...
84329,2013-01,,,,Other crime,
84330,2013-01,,,,Other crime,
84331,2013-01,,,,Other crime,
84332,2013-01,,,,Other crime,


In [30]:
df.groupby(['Borough','Crime type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Borough,Crime type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Arun,Other crime,1,1,1,1
Aylesbury Vale,Violent crime,2,2,2,2
Barking and Dagenham,Anti-social behaviour,535,535,535,535
Barking and Dagenham,Burglary,264,264,264,264
Barking and Dagenham,Criminal damage and arson,133,133,133,133
...,...,...,...,...,...
Westminster,Shoplifting,321,321,321,321
Westminster,Vehicle crime,283,283,283,283
Westminster,Violent crime,490,490,490,490
Windsor and Maidenhead,Public disorder and weapons,1,1,1,1


In [31]:
#Hypothesis 1: Burglary has decreased in Westminster between Jan 2011 and Jan 2013

burglary_df = df[(df['Borough'] == 'Westminster') & ( df['Crime type'] == 'Burglary')]


burglary_df = burglary_df.drop(columns=['Longitude', 'Latitude', 'Location'])
burglary_df

Unnamed: 0,Month,Crime type,Borough
77197,2013-01,Burglary,Westminster
77209,2013-01,Burglary,Westminster
77210,2013-01,Burglary,Westminster
77211,2013-01,Burglary,Westminster
77212,2013-01,Burglary,Westminster
...,...,...,...
82962,2013-01,Burglary,Westminster
83004,2013-01,Burglary,Westminster
83032,2013-01,Burglary,Westminster
83231,2013-01,Burglary,Westminster


In [32]:
len(burglary_df)

340

In [14]:
#Hypothesis 2: Vehicle crime across boroughs is the same throughout 2011

vehicle_df = df[df['Crime type'] == 'Vehicle crime']
vehicle_df = vehicle_df.drop(columns=['Longitude', 'Latitude', 'Location'])
vehicle_df

Unnamed: 0,Month,Crime type,Borough
25,2011-01,Vehicle crime,Barking and Dagenham
26,2011-01,Vehicle crime,Barking and Dagenham
27,2011-01,Vehicle crime,Barking and Dagenham
42,2011-01,Vehicle crime,Barking and Dagenham
43,2011-01,Vehicle crime,Barking and Dagenham
...,...,...,...
98793,2011-01,Vehicle crime,Westminster
98827,2011-01,Vehicle crime,Westminster
99029,2011-01,Vehicle crime,Westminster
99079,2011-01,Vehicle crime,Westminster


In [None]:
#Building a function to test hypothesis 1

def vcp(borough = 'Westminster', year='2011', month='01',):
    
    data = pd.read_csv('London Crime Data 2011 to 2014/{}-{}-metropolitan-street.csv'.format(year,month))
    
    data['LSOA name'].fillna('No location', inplace = True)
            
    return len(data.loc[(data['Borough'].str.contains("Westminster")) & ((data['Crime type'] == "Burglary")|(data['Crime type'] == "Violence and sexual offences"))])/ \
                            len(data.loc[(data['LSOA name'].str.contains("Westminster"))]