In [1]:
#We are evaluating crime levels in the boroughs of Greater London between 2011 and 2014. The first step is to look 
#at data for a single month within the time period and conduct EDA. 

#Thereafter we will build possible hypotheses such as has crime gone up in a certain time period or has a specific
#crime has increased across boroughs or how do two boroughs compare for the same kind of crime

#We begin by importing necessary directories 

import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
#importing the csv file containing data on street crimes for Dec 2010 across all boroughs in Greater England
#dropping unnecessary columns

df = pd.read_csv('/Users/mahimakaushiva/Desktop/Mod3Project/London Crime Data 2011 to 2014/2011-01-metropolitan-street.csv')
df = df.drop(columns=['Crime ID', 'Reported by', 'Falls within', 'LSOA code', 'Last outcome category', 'Context'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA name,Crime type
0,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
1,2011-01,0.140619,51.583427,On or near Rams Grove,Barking and Dagenham 001A,Anti-social behaviour
2,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
3,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
4,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,On or near Thorndike Street,Westminster 024F,Violent crime
99095,2011-01,-0.134491,51.488846,On or near Lupus Street,Westminster 024F,Violent crime
99096,2011-01,-0.134555,51.490466,On or near Garden Terrace,Westminster 024F,Other crime
99097,2011-01,-0.135419,51.489041,On or near St George'S Square,Westminster 024F,Other crime


In [3]:
#Since LSOA name contains both the name of the borough and the code, it is difficult to identify by borough alone
#Hence we separate the two 
#We also change location from on or near to just the name of the street for ease of use 

df['Borough'] = df['LSOA name'].str[:-5]
df['Location'] =df['Location'].str[11:]
df = df.drop(columns=['LSOA name'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,Crime type,Borough
0,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
1,2011-01,0.140619,51.583427,Rams Grove,Anti-social behaviour,Barking and Dagenham
2,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
3,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
4,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,Thorndike Street,Violent crime,Westminster
99095,2011-01,-0.134491,51.488846,Lupus Street,Violent crime,Westminster
99096,2011-01,-0.134555,51.490466,Garden Terrace,Other crime,Westminster
99097,2011-01,-0.135419,51.489041,St George'S Square,Other crime,Westminster


In [4]:
#Classifying incidence of crime as per boroughs

df.groupby(['Borough','Crime type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Borough,Crime type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Barking and Dagenham,Anti-social behaviour,801,801,801,801
Barking and Dagenham,Burglary,213,213,213,213
Barking and Dagenham,Other crime,653,653,653,653
Barking and Dagenham,Robbery,80,80,80,80
Barking and Dagenham,Vehicle crime,244,244,244,244
...,...,...,...,...,...
Westminster,Burglary,328,328,328,328
Westminster,Other crime,3793,3793,3793,3793
Westminster,Robbery,199,199,199,199
Westminster,Vehicle crime,281,281,281,281


In [5]:
#checking for crime type across boroughs

df.groupby(['Crime type', 'Borough']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Crime type,Borough,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Anti-social behaviour,Barking and Dagenham,801,801,801,801
Anti-social behaviour,Barnet,1115,1115,1115,1115
Anti-social behaviour,Bexley,648,648,648,648
Anti-social behaviour,Brent,1156,1156,1156,1156
Anti-social behaviour,Bromley,1017,1017,1017,1017
...,...,...,...,...,...
Violent crime,Sutton,179,179,179,179
Violent crime,Tower Hamlets,468,468,468,468
Violent crime,Waltham Forest,450,450,450,450
Violent crime,Wandsworth,298,298,298,298


In [6]:
#checking for the list of unique boroughs

boroughs = set(df['Borough'])
boroughs

{'Barking and Dagenham',
 'Barnet',
 'Bexley',
 'Brent',
 'Bromley',
 'Camden',
 'City of London',
 'Croydon',
 'Ealing',
 'Elmbridge',
 'Enfield',
 'Epping Forest',
 'Epsom and Ewell',
 'Greenwich',
 'Hackney',
 'Hammersmith and Fulham',
 'Haringey',
 'Harrow',
 'Havering',
 'Hertsmere',
 'Hillingdon',
 'Hounslow',
 'Islington',
 'Kensington and Chelsea',
 'Kingston upon Thames',
 'Lambeth',
 'Lewisham',
 'Merton',
 'Newham',
 'Redbridge',
 'Reigate and Banstead',
 'Richmond upon Thames',
 'Sevenoaks',
 'Slough',
 'Southwark',
 'Spelthorne',
 'Sutton',
 'Tandridge',
 'Three Rivers',
 'Thurrock',
 'Tower Hamlets',
 'Waltham Forest',
 'Wandsworth',
 'Westminster'}

In [7]:
#checking for the list of unique crimes

crimes = set(df['Crime type'])
crimes

{'Anti-social behaviour',
 'Burglary',
 'Other crime',
 'Robbery',
 'Vehicle crime',
 'Violent crime'}

In [8]:
#In order to build a hypothesis to view data across different files, we create a function that can loop over the time
# period 2011-2014 and repeat the functions we performed in the EDA above to run the code for this new dataframe

years = ['2011']
months = ['01']

for x in years:
    for y in months:
        filename = f'/Users/mahimakaushiva/Desktop/Mod3Project/London Crime Data 2011 to 2014/{x}-{y}-metropolitan-street.csv'
        df = pd.read_csv(filename)
df

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context
0,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
1,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.140619,51.583427,On or near Rams Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
2,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
3,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
4,,2011-01,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
...,...,...,...,...,...,...,...,...,...,...,...,...
99094,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.134817,51.490740,On or near Thorndike Street,E01004741,Westminster 024F,Violent crime,,
99095,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.134491,51.488846,On or near Lupus Street,E01004741,Westminster 024F,Violent crime,,
99096,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.134555,51.490466,On or near Garden Terrace,E01004741,Westminster 024F,Other crime,,
99097,,2011-01,Metropolitan Police Service,Metropolitan Police Service,-0.135419,51.489041,On or near St George'S Square,E01004741,Westminster 024F,Other crime,,


In [9]:
df = df.drop(columns=['Crime ID', 'Reported by', 'Falls within', 'LSOA code', 'Last outcome category', 'Context'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA name,Crime type
0,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
1,2011-01,0.140619,51.583427,On or near Rams Grove,Barking and Dagenham 001A,Anti-social behaviour
2,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
3,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
4,2011-01,0.134947,51.588063,On or near Mead Grove,Barking and Dagenham 001A,Anti-social behaviour
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,On or near Thorndike Street,Westminster 024F,Violent crime
99095,2011-01,-0.134491,51.488846,On or near Lupus Street,Westminster 024F,Violent crime
99096,2011-01,-0.134555,51.490466,On or near Garden Terrace,Westminster 024F,Other crime
99097,2011-01,-0.135419,51.489041,On or near St George'S Square,Westminster 024F,Other crime


In [10]:
df['Borough'] = df['LSOA name'].str[:-5]
df['Location'] =df['Location'].str[11:]
df = df.drop(columns=['LSOA name'])
df

Unnamed: 0,Month,Longitude,Latitude,Location,Crime type,Borough
0,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
1,2011-01,0.140619,51.583427,Rams Grove,Anti-social behaviour,Barking and Dagenham
2,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
3,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
4,2011-01,0.134947,51.588063,Mead Grove,Anti-social behaviour,Barking and Dagenham
...,...,...,...,...,...,...
99094,2011-01,-0.134817,51.490740,Thorndike Street,Violent crime,Westminster
99095,2011-01,-0.134491,51.488846,Lupus Street,Violent crime,Westminster
99096,2011-01,-0.134555,51.490466,Garden Terrace,Other crime,Westminster
99097,2011-01,-0.135419,51.489041,St George'S Square,Other crime,Westminster


In [11]:
df.groupby(['Borough','Crime type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Borough,Crime type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Barking and Dagenham,Anti-social behaviour,801,801,801,801
Barking and Dagenham,Burglary,213,213,213,213
Barking and Dagenham,Other crime,653,653,653,653
Barking and Dagenham,Robbery,80,80,80,80
Barking and Dagenham,Vehicle crime,244,244,244,244
...,...,...,...,...,...
Westminster,Burglary,328,328,328,328
Westminster,Other crime,3793,3793,3793,3793
Westminster,Robbery,199,199,199,199
Westminster,Vehicle crime,281,281,281,281


In [12]:
df.groupby(['Borough', 'Crime type']).count().mean()

Month        460.925581
Longitude    460.925581
Latitude     460.925581
Location     460.925581
dtype: float64

In [13]:
df.groupby(['Crime type', 'Borough']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Crime type,Borough,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Anti-social behaviour,Barking and Dagenham,801,801,801,801
Anti-social behaviour,Barnet,1115,1115,1115,1115
Anti-social behaviour,Bexley,648,648,648,648
Anti-social behaviour,Brent,1156,1156,1156,1156
Anti-social behaviour,Bromley,1017,1017,1017,1017
...,...,...,...,...,...
Violent crime,Sutton,179,179,179,179
Violent crime,Tower Hamlets,468,468,468,468
Violent crime,Waltham Forest,450,450,450,450
Violent crime,Wandsworth,298,298,298,298


In [14]:
df.groupby(['Crime type', 'Borough']).count().mean()

Month        460.925581
Longitude    460.925581
Latitude     460.925581
Location     460.925581
dtype: float64

In [15]:
burglary_df = df[(df['Borough'] == 'Westminster') & ( df['Crime type'] == 'Burglary')]


burglary_df = burglary_df.drop(columns=['Longitude', 'Latitude', 'Location'])
burglary_df

Unnamed: 0,Month,Crime type,Borough
92058,2011-01,Burglary,Westminster
92059,2011-01,Burglary,Westminster
92080,2011-01,Burglary,Westminster
92081,2011-01,Burglary,Westminster
92082,2011-01,Burglary,Westminster
...,...,...,...
98676,2011-01,Burglary,Westminster
98714,2011-01,Burglary,Westminster
98719,2011-01,Burglary,Westminster
98731,2011-01,Burglary,Westminster


In [16]:
burglary_df['Month'].count().mean()

328.0

In [17]:
len(burglary_df)

328

In [18]:
#robbery in Westminster for Jan 2011

robbery_df = df[(df['Borough'] == 'Westminster') & (df['Crime type'] == 'Robbery')]
robbery_df = robbery_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(robbery_df)

199

In [19]:
#anti-social behaviour for Jan 2011
as_df = df[(df['Borough'] == 'Westminster') & (df['Crime type'] == 'Anti-social behaviour')]
as_df = as_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(as_df)

1756

In [20]:
#Other crime for Jan 2011

oc_df = df[(df['Borough'] == 'Westminster') & (df['Crime type'] == 'Other crime')]
oc_df = oc_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(oc_df)


3793

In [21]:
#vehicle crime for Jan 2011

vehicle_df = df[(df['Borough'] == 'Westminster') & (df['Crime type'] == 'Vehicle crime')]
vehicle_df = vehicle_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(vehicle_df)

281

In [22]:
#violent crime for Jan 2011

violence_df = df[(df['Borough'] == 'Westminster') & (df['Crime type'] == 'Violent crime')]
violence_df = violence_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(violence_df)

693

In [23]:
#total crime in Jan 2011

total_crime = 693+281+3793+1756+199+328
total_crime

7050

In [24]:
#In order to build a hypothesis to view data across different files, we create a function that can loop over the time
# period 2011-2014 and repeat the functions we performed in the EDA above to run the code for this new dataframe

years = ['2013']
months = ['01']

for x in years:
    for y in months:
        filename = f'/Users/mahimakaushiva/Desktop/Mod3Project/London Crime Data 2011 to 2014/{x}-{y}-metropolitan-street.csv'
        new_df = pd.read_csv(filename)
new_df

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context
0,3639c80728b90690a99fbe4b4539d09cdca65c4d82ed33...,2013-01,Metropolitan Police Service,Metropolitan Police Service,-0.579818,50.816207,On or near Prison,E01031470,Arun 006A,Other crime,Court result unavailable,
1,8b87843d8fd523ff63447a6bc67d0b08492a7600b851e3...,2013-01,Metropolitan Police Service,Metropolitan Police Service,-0.805749,51.798149,On or near Lower Road,E01017687,Aylesbury Vale 019A,Violent crime,Under investigation,
2,a3b26456448702399aaa37cbf0472a060bc87f5a54931c...,2013-01,Metropolitan Police Service,Metropolitan Police Service,-0.805749,51.798149,On or near Lower Road,E01017687,Aylesbury Vale 019A,Violent crime,Under investigation,
3,,2013-01,Metropolitan Police Service,Metropolitan Police Service,0.140634,51.583427,On or near Rams Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
4,,2013-01,Metropolitan Police Service,Metropolitan Police Service,0.136513,51.588214,On or near Kingston Close,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
...,...,...,...,...,...,...,...,...,...,...,...,...
84329,6a6b6a951fc52fcde7651c8a1cb3966d4f2b8b0f02b845...,2013-01,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Offender given a caution,
84330,a29d3691ac3bdf2d3ecfd3b0bca6876ecf0ae027cb13a3...,2013-01,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Court result unavailable,
84331,ede539c372fc4a3a8665a9107ee40a15cd08e9a4b7dd3b...,2013-01,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Investigation complete; no suspect identified,
84332,5baf5cfd5b5b5cf9915b8d3af69fef96889574cc39f8ec...,2013-01,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Other crime,Investigation complete; no suspect identified,


In [25]:
new_df = new_df.drop(columns=['Crime ID', 'Reported by', 'Falls within', 'LSOA code', 'Last outcome category', 'Context'])
new_df

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA name,Crime type
0,2013-01,-0.579818,50.816207,On or near Prison,Arun 006A,Other crime
1,2013-01,-0.805749,51.798149,On or near Lower Road,Aylesbury Vale 019A,Violent crime
2,2013-01,-0.805749,51.798149,On or near Lower Road,Aylesbury Vale 019A,Violent crime
3,2013-01,0.140634,51.583427,On or near Rams Grove,Barking and Dagenham 001A,Anti-social behaviour
4,2013-01,0.136513,51.588214,On or near Kingston Close,Barking and Dagenham 001A,Anti-social behaviour
...,...,...,...,...,...,...
84329,2013-01,,,No Location,,Other crime
84330,2013-01,,,No Location,,Other crime
84331,2013-01,,,No Location,,Other crime
84332,2013-01,,,No Location,,Other crime


In [26]:
Crimes = set(new_df['Crime type'])
Crimes

{'Anti-social behaviour',
 'Burglary',
 'Criminal damage and arson',
 'Drugs',
 'Other crime',
 'Other theft',
 'Public disorder and weapons',
 'Robbery',
 'Shoplifting',
 'Vehicle crime',
 'Violent crime'}

In [27]:
new_df['Borough'] = new_df['LSOA name'].str[:-5]
new_df['Location'] = new_df['Location'].str[11:]
new_df = new_df.drop(columns=['LSOA name'])
new_df

Unnamed: 0,Month,Longitude,Latitude,Location,Crime type,Borough
0,2013-01,-0.579818,50.816207,Prison,Other crime,Arun
1,2013-01,-0.805749,51.798149,Lower Road,Violent crime,Aylesbury Vale
2,2013-01,-0.805749,51.798149,Lower Road,Violent crime,Aylesbury Vale
3,2013-01,0.140634,51.583427,Rams Grove,Anti-social behaviour,Barking and Dagenham
4,2013-01,0.136513,51.588214,Kingston Close,Anti-social behaviour,Barking and Dagenham
...,...,...,...,...,...,...
84329,2013-01,,,,Other crime,
84330,2013-01,,,,Other crime,
84331,2013-01,,,,Other crime,
84332,2013-01,,,,Other crime,


In [28]:
new_df.groupby(['Borough','Crime type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Month,Longitude,Latitude,Location
Borough,Crime type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Arun,Other crime,1,1,1,1
Aylesbury Vale,Violent crime,2,2,2,2
Barking and Dagenham,Anti-social behaviour,535,535,535,535
Barking and Dagenham,Burglary,264,264,264,264
Barking and Dagenham,Criminal damage and arson,133,133,133,133
...,...,...,...,...,...
Westminster,Shoplifting,321,321,321,321
Westminster,Vehicle crime,283,283,283,283
Westminster,Violent crime,490,490,490,490
Windsor and Maidenhead,Public disorder and weapons,1,1,1,1


In [29]:
#burglaries in Jan 2013
burglary_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Burglary')]
burglary_new_df = burglary_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(burglary_new_df)

340

In [30]:
#anti-social behaviour in Jan 2013
as_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Anti-social behaviour')]
as_new_df = as_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(as_new_df)

1135

In [31]:
#robbery in Jan 2013
robbery_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Robbery')]
robbery_new_df = robbery_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(robbery_new_df)

167

In [32]:
#other crime in Jan 2013
oc_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Other crime')]
oc_new_df = oc_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(oc_new_df)

217

In [33]:
#vehicle crime in Jan 2013
vehicle_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Vehicle crime')]
vehicle_new_df = vehicle_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(vehicle_new_df)

283

In [34]:
#violent crime in Jan 2013
violence_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Violent crime')]
violence_new_df = violence_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(violence_new_df)

490

In [35]:
#criminal damage and arson in Jan 2013
cda_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Criminal damage and arson')]
cda_new_df = cda_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(cda_new_df)

156

In [36]:
#other theft in Jan 2013
other_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Other theft')]
other_new_df = other_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(other_new_df)

2532

In [37]:
#public disorder and weapons in Jan 2013
pdw_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Public disorder and weapons')]
pdw_new_df = pdw_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(pdw_new_df)

115

In [38]:
#shoplifting in Jan 2013
shoplifting_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Shoplifting')]
shoplifting_new_df = shoplifting_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(shoplifting_new_df)

321

In [39]:
#drugs in Jan 2013
drugs_new_df = new_df[(new_df['Borough'] == 'Westminster') & ( new_df['Crime type'] == 'Drugs')]
drugs_new_df = drugs_new_df.drop(columns=['Longitude', 'Latitude', 'Location'])
len(drugs_new_df)

341

In [40]:
#total crime in Jan 2013 in Westminster
Total_Crime = 490+283+217+167+1135+340+156+2532+115+321+341
Total_Crime

6097

In [41]:
#testing Hypothesis 1: There is a difference between mean robberies/
#between Jan 2011 and Jan 2013

#H0: There is no difference between robberies in the two months
#HA: There is a statistically significant difference between robberies in the two months


from __future__ import print_function, division
import scipy.stats
import hypothesis_tests as hyp
from scipy import stats
import math
np.random.seed(10)

In [42]:
now = []
months = ['01']
years = ['2011', '2013']
dates = []
for y in years:
    for x in months:
        a = hyp.vcp('Westminster', y, x)
        now.append(a)
        dates.append('{}-{}'.format(y,x))
        robbery_df['2011-2013'.format(x)[0]] = a

In [43]:
robbery_df

Unnamed: 0,Month,Crime type,Borough,2
92060,2011-01,Robbery,Westminster,0.102071
92098,2011-01,Robbery,Westminster,0.102071
92125,2011-01,Robbery,Westminster,0.102071
92126,2011-01,Robbery,Westminster,0.102071
92127,2011-01,Robbery,Westminster,0.102071
...,...,...,...,...
98825,2011-01,Robbery,Westminster,0.102071
98826,2011-01,Robbery,Westminster,0.102071
99028,2011-01,Robbery,Westminster,0.102071
99085,2011-01,Robbery,Westminster,0.102071


In [44]:
robbery_new_df

Unnamed: 0,Month,Crime type,Borough
77247,2013-01,Robbery,Westminster
77284,2013-01,Robbery,Westminster
77326,2013-01,Robbery,Westminster
77370,2013-01,Robbery,Westminster
77371,2013-01,Robbery,Westminster
...,...,...,...
82918,2013-01,Robbery,Westminster
82986,2013-01,Robbery,Westminster
83047,2013-01,Robbery,Westminster
83206,2013-01,Robbery,Westminster


In [45]:
robbery1 = len(robbery_df)
n1 = total_crime
p1 = robbery1/n1
mu1 = n1*p1
var1 = n1*p1*(1-p1)

robbery2 = len(robbery_new_df)
n2 = Total_Crime
p2 = robbery2/n2
mu2 = n2*p2
var2 = n2*p2*(1-p2)

print("Dist1 has probability {} and sample size {}".format(p1,n1))
print("Dist2 has probability {} and sample size {}".format(p2,n2))

Dist1 has probability 0.02822695035460993 and sample size 7050
Dist2 has probability 0.02739051992783336 and sample size 6097


In [51]:
p_hat = (n1*p1 + n2*p2)/(n1+n2)
critical_value = 1.96

In [53]:
z = (p1-p2)/((p_hat*(1-p_hat)*((1/n1) + (1/n2)))**0.5)
z

0.29071862110712315

In [None]:
#since this value is<1.95, we can conclude that it is not statistically 
#significant and hence we cannot reject the null hypothesis
#Overall, it is safe to conclude that while total crime levels have gone
#down from 7050 to 6097, there is no real change in the proportion of occurence of robberies in Westminster 

In [47]:
#Hypothesis 2: Vehicle crime across boroughs is the same throughout 2011

vehicle_df = df[df['Crime type'] == 'Vehicle crime']
vehicle_df = vehicle_df.drop(columns=['Longitude', 'Latitude', 'Location'])
vehicle_df

Unnamed: 0,Month,Crime type,Borough
25,2011-01,Vehicle crime,Barking and Dagenham
26,2011-01,Vehicle crime,Barking and Dagenham
27,2011-01,Vehicle crime,Barking and Dagenham
42,2011-01,Vehicle crime,Barking and Dagenham
43,2011-01,Vehicle crime,Barking and Dagenham
...,...,...,...
98793,2011-01,Vehicle crime,Westminster
98827,2011-01,Vehicle crime,Westminster
99029,2011-01,Vehicle crime,Westminster
99079,2011-01,Vehicle crime,Westminster


In [48]:
#Building a function to test hypothesis 1

def vcp(borough = 'Westminster', year='2011', month='01',):
    
    data = pd.read_csv('London Crime Data 2011 to 2014/{}-{}-metropolitan-street.csv'.format(year,month))
    
    data['LSOA name'].fillna('No location', inplace = True)
            
    return len(data.loc[(data['Borough'].str.contains("Westminster")) & ((data['Crime type'] == "Burglary")|(data['Crime type'] == "Violence and sexual offences"))])/ \
                            len(data.loc[(data['LSOA name'].str.contains("Westminster"))]

SyntaxError: unexpected EOF while parsing (<ipython-input-48-e9d07c4a5dff>, line 10)