# Prepare Data for Workshop

Workshop: https://github.com/IBMDeveloperUK/geopandas-workshop

Data downloaded from https://data.police.uk/data/ for Metropolitan Police Service into `/data/crime_data` folder, unzipped and all files moved to one folder `/data/crime_data/by_year`.

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import geopandas as gpd
import zipfile

## Convert crime data to easier readable format

These files are uploaded to https://github.com/IBMDeveloperUK/geopandas-workshop/tree/master/data

In [None]:
years = ['2017','2018']
months = ['01','02','03','04','05','06','07','08','09','10','11','12']

for year in years:
    for month in months:
        if month == '01':
            df = pd.read_csv("data/crime_data/"+year+"-"+month+"-metropolitan-stop-and-search.csv")
        else:
            df2 = pd.read_csv("data/crime_data/"+year+"-"+month+"-metropolitan-stop-and-search.csv")
            df = df.append(df2)
    df = df.drop(columns=['Gender', 'Age range','Self-defined ethnicity',
                                  'Officer-defined ethnicity','Removal of more than just outer clothing'])
    df.to_csv("data/crime_data/by_year/"+year+"-metropolitan-stop-and-search.csv")
    zip_file = zipfile.ZipFile("data/crime_data/by_year/"+year+"-metropolitan-stop-and-search.zip", 'w')
    zip_file.write("data/crime_data/by_year/"+year+"-metropolitan-stop-and-search.csv", compress_type=zipfile.ZIP_DEFLATED)
    zip_file.close()

!rm data/crime_data/by_year/*.csv

In [None]:
years = ['2017','2018']
months = ['01','02','03','04','05','06']

for year in years:
    for month in months:
        if month == '01':
            df = pd.read_csv("data/crime_data/"+year+"-"+month+"-metropolitan-street.csv")
        else:
            df2 = pd.read_csv("data/crime_data/"+year+"-"+month+"-metropolitan-street.csv")
            df = df.append(df2)
    df = df.drop(columns=['Reported by','Falls within','LSOA name'])
    df.to_csv("data/crime_data/by_year/"+year+"-1-metropolitan-street.csv")
    zip_file = zipfile.ZipFile("data/crime_data/by_year/"+year+"-1-metropolitan-street.zip", 'w')
    zip_file.write("data/crime_data/by_year/"+year+"-1-metropolitan-street.csv", compress_type=zipfile.ZIP_DEFLATED)
    zip_file.close()

!rm data/crime_data/by_year/*.csv

In [None]:
years = ['2017','2018']
months = ['07','08','09','10','11','12']

for year in years:
    for month in months:
        if month == '07':
            df = pd.read_csv("data/crime_data/"+year+"-"+month+"-metropolitan-street.csv")
        else:
            df2 = pd.read_csv("data/crime_data/"+year+"-"+month+"-metropolitan-street.csv")
            df = df.append(df2)
    df = df.drop(columns=['Reported by','Falls within','LSOA name'])
    df.to_csv("data/crime_data/by_year/"+year+"-2-metropolitan-street.csv")
    zip_file = zipfile.ZipFile("data/crime_data/by_year/"+year+"-2-metropolitan-street.zip", 'w')
    zip_file.write("data/crime_data/by_year/"+year+"-2-metropolitan-street.csv", compress_type=zipfile.ZIP_DEFLATED)
    zip_file.close()

!rm data/crime_data/by_year/*.csv

## Clean up Borough shape files

In [None]:
# https://data.london.gov.uk/dataset/2011-boundary-files
# Included are Output Area (OA), Lower Super Output Area (LSOA) and Middle-Level Super Output Area (MSOA) bounadries.
#Each geography is provided at Extent of the Realm (BFE), Coastline (BFC) and Generalised Coastline (BGC).

Boundaries = gpd.read_file("data/2011_london_boundaries/LSOA_2011_BFE_London/LSOA_2011_BFE_City_of_London.shp")
#Boundaries[]
Boundaries.plot();

In [None]:
Boundaries = Boundaries.append(gpd.read_file("data/2011_london_boundaries/LSOA_2011_BFE_London/LSOA_2011_BFE_Westminster.shp"))
Boundaries.plot();

In [None]:
Boundaries = Boundaries.append(gpd.read_file("data/2011_london_boundaries/LSOA_2011_BFE_London/LSOA_2011_BFE_Camden.shp"))
Boundaries = Boundaries.append(gpd.read_file("data/2011_london_boundaries/LSOA_2011_BFE_London/LSOA_2011_BFE_Islington.shp"))
Boundaries = Boundaries.append(gpd.read_file("data/2011_london_boundaries/LSOA_2011_BFE_London/LSOA_2011_BFE_Hackney.shp"))
Boundaries = Boundaries.append(gpd.read_file("data/2011_london_boundaries/LSOA_2011_BFE_London/LSOA_2011_BFE_Tower_Hamlets.shp"))
Boundaries = Boundaries.append(gpd.read_file("data/2011_london_boundaries/LSOA_2011_BFE_London/LSOA_2011_BFE_Southwark.shp"))
Boundaries = Boundaries.append(gpd.read_file("data/2011_london_boundaries/LSOA_2011_BFE_London/LSOA_2011_BFE_Lambeth.shp"))

Boundaries.plot();

In [None]:
Boundaries.plot(column='POPDEN',cmap="Reds",scheme='quantiles');

In [None]:
Boundaries.head()

In [None]:
Boundaries.to_file("data/boundaries.shp")

## Convert to lat/lon - work in progress

In [None]:
#https://pypi.org/project/OSGridConverter/
#!pip install OSGridConverter

#from OSGridConverter import grid2latlong

#l=grid2latlong('TG 532151  181867')
#(l.latitude,l.longitude)

In [None]:
#from OSGridConverter import latlong2grid
#g=latlong2grid(51.993742,-0.975257, tag = ‘WGS84’)
#str(g)

## boroughs in lat/lon

In [None]:
#https://skgrange.github.io/www/data/london_boroughs.json

boroughs2 = gpd.read_file("data/london_boroughs.json")
boroughs2.tail()

In [None]:
boroughs2.plot();

In [None]:
london = boroughs2.dissolve(by='inner_statistical',aggfunc='sum')
london
london.head()

## Bounding box for extracting London OSM data

In [None]:
bounding_box = london.envelope


In [None]:
bb = gpd.GeoDataFrame(gpd.GeoSeries(bounding_box), columns=['geometry'])
bb.head()

In [None]:
bb.plot();

In [None]:
london2 = london.drop([0, 0])
london2.head()

In [None]:
london2.plot();

In [None]:
xmin, ymin, xmax, ymax = london2.total_bounds
xmin, ymin, xmax, ymax

## Open Street Map data

In [None]:
# http://download.geofabrik.de/europe/great-britain.html

roads_all = gpd.read_file("data/england-latest-free/gis_osm_roads_free_1.shp")
roads_all.head()


In [None]:
roads = roads_all.cx[xmin:xmax, ymin:ymax]

In [None]:
roads.to_file("data/london_inner_roads.shp")

In [None]:
pois_all = gpd.read_file("data/england-latest-free/gis_osm_pois_free_1.shp")
pois_all.head()


In [None]:
pois = pois_all.cx[xmin:xmax, ymin:ymax]

In [None]:
pois.to_file("data/london_inner_pois.shp")