In [1]:
# imports
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import geopandas as gpd
import pandas as pd
import subprocess
import os

In [2]:
# Downloads

data_dir = 'data/'
# crime data
crime_url = 'http://webarchive.nationalarchives.gov.uk/+/http://www.homeoffice.gov.uk/publications/science-research-statistics/research-statistics/crime-research/local-police-recorded-crime/reccrime-la.csv?view=Binary'
if not os.path.exists(data_dir + 'crime.csv'):
    subprocess.call(['wget',crime_url,'-O',data_dir + 'crime.csv'])

# population data
pop_url = 'https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/populationestimatesforukenglandandwalesscotlandandnorthernireland/mid2001tomid2010/mid2001tomid2012ukpopulationestimatesv2.zip'
if not os.path.exists(data_dir + 'pop.zip'):
    subprocess.call(['wget',pop_url,'-O',data_dir + 'pop.zip'])
    subprocess.call(['unzip','-j',data_dir + 'pop.zip','MYE6PE3_mid-2001-mid-2012-unformatted-syoa-data-file.xls','-d','data'])
    
# geojson shape data
geo_json_url = 'https://raw.githubusercontent.com/martinjc/UK-GeoJSON/master/json/administrative/eng/lad.json'
if not os.path.exists(data_dir + 'lad.json'):
    subprocess.call(['wget',geo_json_url,'-O',data_dir + 'lad.json'])

In [3]:
# cleaning crime
crime = pd.read_csv(data_dir + 'crime.csv')
crime.columns = ['year'] + ['_'.join(x.lower().split()) for x in crime.columns[1:-1]] + ['total']
crime.year = pd.to_datetime(crime.year,format='%d/%m/%Y')
crime = crime.rename(columns={'onscode':'code'})
crime = crime[['year','code','offence','total']]

# only keep march year starts
crime['month'] = crime.year.map(lambda x : x.month)
crime = crime[crime.month == 3]
crime = crime.drop('month',axis=1)

# drop transport police data
crime = crime[crime.code != 'Delete']

In [4]:
# cleaning geo
geo = gpd.read_file(data_dir + 'lad.json')
geo = geo.rename(columns={'LAD13CDO':'code'})
geo.index = geo.code

crime = crime[crime.code.isin(geo.code)]

geo.loc['00EM','LAD13CD'] = 'E06000048'
geo.loc['26UD','LAD13CD'] = 'E07000097'
geo.loc['26UH','LAD13CD'] = 'E07000101'
geo.loc['00CH','LAD13CD'] = 'E08000020'

In [5]:
# join crime and geo
df = geo.merge(crime)
df = df.drop('LAD13NMW',axis=1)

In [6]:
# cleaning pop
def clean_pop_df(pops,i,df):
    pops.columns = pops.iloc[0,:]
    # want males + females
    pops = pops[1:456]

    # only want england
    pops['temp'] = pops.Code.map(lambda x : x[0] == 'E')
    pops = pops[pops.temp == True]
    pops = pops.drop('temp',axis=1)

    # only want all ages
    pops = pops.iloc[:,:3]
    pops = pops[pops.Code.isin(df.LAD13CD)]
    
    pops['year'] = str(2000 + i)
    
    pops = pops.rename(columns={'Code':'code'})
    pops = pops.iloc[:,[0,2,3]]
    
    return pops

pops = [pd.read_excel(data_dir + 'MYE6PE3_mid-2001-mid-2012-unformatted-syoa-data-file.xls',sheetname=i) for i in range(3,12)]
pops = [clean_pop_df(pop,i+3,geo) for i,pop in enumerate(pops)]

pops = pd.concat(pops)

pops.year = pd.to_datetime(pops.year,format='%Y')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [7]:
df = df.drop('code',axis=1)
df = df.rename(columns={'LAD13CD':'code'})
df.year = df.year.map(lambda x : x.year)
df.year = pd.to_datetime(df.year,format='%Y')

df = df.merge(pops,on=['code','year'],how='outer')

df['per1000'] = (df.total / df[' ALL AGES']) * 1000
df['per1000'] = df['per1000'].astype(float)

df[' All AGES'] = df[' ALL AGES'].astype(float)

In [8]:
def crime_increases(code):

    area = df[df.code == code]

    year0 = area.pivot_table(values='per1000',index='year',columns='offence').iloc[0,:]
    yearN = area.pivot_table(values='per1000',index='year',columns='offence').iloc[-1,:]

    change = (yearN - year0)/year0
    
    return dict(change)

In [9]:
crime_increase_data = {code:crime_increases(code) for code in df.code.unique()}
crime_increase_df = pd.DataFrame(crime_increase_data).T

geo.index = geo.LAD13CD
crime_increase_df = crime_increase_df.join(geo)
crime_increase_df = gpd.GeoDataFrame(crime_increase_df)

In [10]:
with open('data/drug_change.json','wb') as f:
    f.write(crime_increase_df[['LAD13CD','LAD13NM','geometry','Drug offences']].to_json())

- The file drug_change.json now has all the data needed to produce the viz, the current shapes are however quite detailed and so the page will load slowly. It is possible to simplify the geojson file using the following website -> http://www.mapshaper.org/
- Should really look into how to do this in python!