#### Crime Data - Home Burglaries by Ward

In [1]:
# Load necessary packages
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler

In [2]:
# Function to save cleaned dataframe

def save_data(dataframe, dest: str, name: str):
    file = os.path.join(dest, name)  # dest/name

    # If the file is not existing...
    if not os.path.isfile(file):
        print(f"{file} not found, saving...")
        path = os.path.split(dest)  # dest1, dest2

        # Make sure the destination file exits
        if len(path) >= 1 and path[0] != "":
            os.makedirs(os.path.join(*path), exist_ok=True)

        # Save the file
        dataframe.to_csv(file, index=False)

        print("\tDone saving.")

    else:
        print(f"Found {file} locally, done!")

    return

In [4]:
# Reading in crime data

file = os.path.join('data', 'raw', 'crime_data_ward.csv')
cols = ['WardName', 'WardCode', 'MinorText', '201401', '201402', '201403', '201404',
       '201405', '201406', '201407', '201408', '201409', '201410', '201411', '201412']

df_crime = pd.read_csv(file, usecols = cols, encoding= 'unicode_escape')

In [5]:
df_crime.head()

Unnamed: 0,WardName,WardCode,MinorText,201401,201402,201403,201404,201405,201406,201407,201408,201409,201410,201411,201412
0,Abbey,E05000026,Arson,0,0,0,1,0,1,0,0,0,1,2,0
1,Abbey,E05000026,Criminal Damage,11,5,17,10,7,11,10,15,9,10,6,14
2,Abbey,E05000026,Burglary Business and Community,2,4,3,5,1,1,1,6,2,5,3,5
3,Abbey,E05000026,Domestic Burglary,5,5,9,2,6,3,4,2,2,1,6,5
4,Abbey,E05000026,Drug Trafficking,1,2,0,0,1,0,0,0,0,1,1,0


In [9]:
# Select only Domestic Burglary
df_crime = df_crime.loc[(df_crime['MinorText'] == 'Domestic Burglary')]

In [20]:
# Find the sum of Domestic Burglaries that happened in year 2014
df_crime['total2014'] = df_crime['201401'] + df_crime['201402'] + df_crime['201403'] + df_crime['201404'] + df_crime['201405'] + df_crime['201406'] + df_crime['201407'] + df_crime['201408'] + df_crime['201409'] + df_crime['201410'] + df_crime['201411'] + df_crime['201412']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


In [22]:
df_crime = df_crime[['WardName', 'WardCode', 'MinorText', 'total2014']]

In [25]:
df_crime.head()

Unnamed: 0,WardName,WardCode,MinorText,total2014
3,Abbey,E05000026,Domestic Burglary,50
52,Alibon,E05000027,Domestic Burglary,70
95,Becontree,E05000028,Domestic Burglary,99
140,Chadwell Heath,E05000029,Domestic Burglary,91
184,Eastbrook,E05000030,Domestic Burglary,72


In [35]:
# Read in population data of wards

file = os.path.join('data', 'raw', 'ward_atlas.csv')
cols = ['New code', 'Population - 2015']

df_wardpop = pd.read_csv(file, usecols = cols, encoding= 'unicode_escape')

In [36]:
df_wardpop.head()

Unnamed: 0,New code,Population - 2015
0,E09000001,8100
1,E05000026,14750
2,E05000027,10600
3,E05000028,12700
4,E05000029,10400


In [38]:
# do a left join - population data and crime
df_joinedcrime = pd.merge(df_crime, df_wardpop, left_on='WardCode', right_on='New code', how='left')

In [39]:
df_joinedcrime.head()

Unnamed: 0,WardName,WardCode,MinorText,total2014,New code,Population - 2015
0,Abbey,E05000026,Domestic Burglary,50,E05000026,14750.0
1,Alibon,E05000027,Domestic Burglary,70,E05000027,10600.0
2,Becontree,E05000028,Domestic Burglary,99,E05000028,12700.0
3,Chadwell Heath,E05000029,Domestic Burglary,91,E05000029,10400.0
4,Eastbrook,E05000030,Domestic Burglary,72,E05000030,10750.0


In [40]:
df_joinedcrime['burglariesper1000residents'] = df_joinedcrime['total2014']/df_joinedcrime['Population - 2015']*1000

In [41]:
df_joinedcrime.head()

Unnamed: 0,WardName,WardCode,MinorText,total2014,New code,Population - 2015,burglariesper1000residents
0,Abbey,E05000026,Domestic Burglary,50,E05000026,14750.0,3.389831
1,Alibon,E05000027,Domestic Burglary,70,E05000027,10600.0,6.603774
2,Becontree,E05000028,Domestic Burglary,99,E05000028,12700.0,7.795276
3,Chadwell Heath,E05000029,Domestic Burglary,91,E05000029,10400.0,8.75
4,Eastbrook,E05000030,Domestic Burglary,72,E05000030,10750.0,6.697674


In [46]:
df_crimeNormalised = df_joinedcrime[['WardName', 'WardCode', 'total2014', 'Population - 2015', 'burglariesper1000residents']]

In [47]:
df_crimeNormalised.head()

Unnamed: 0,WardName,WardCode,total2014,Population - 2015,burglariesper1000residents
0,Abbey,E05000026,50,14750.0,3.389831
1,Alibon,E05000027,70,10600.0,6.603774
2,Becontree,E05000028,99,12700.0,7.795276
3,Chadwell Heath,E05000029,91,10400.0,8.75
4,Eastbrook,E05000030,72,10750.0,6.697674
