In [None]:
# Geospatial practice
# turn postcodes into lsoas

In [None]:
# import 
import folium
import geopandas as gpd
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import urllib.request
import zipfile

In [None]:
os.makedirs('source', exist_ok=True)
os.makedirs('data', exist_ok=True)

In [None]:
# fake dataframe to use for postcode to LSOA IMD creation

# Data
data = {
    'name': ['John Doe', 'Jane Smith', 'Alice Brown', 'Bob White', 'Nas Gupta'],
    'address': ['123 Elm St', '456 Oak St', '789 Pine St', '101 Maple St', '202 Birch St'],
    'Incident Postcode': ['LE15 6dt', 'LE15 6aj', 'le5 4PW', 'LE1 5WW', 'LE15 6AS'],
    'age': [28, 35, 42, 55, 61],
    'ethnicity': ['White', 'Black', 'Asian', 'White', 'Indian']
}

# Create dataframe
df = pd.DataFrame(data)

df['Incident Postcode'] = df['Incident Postcode'].str.upper()

# Display dataframe
print(df)

In [None]:
# Global variables - downloading function for use with large downloading datasets
cache_path = os.path.join(os.getcwd(), "cache") # path for downloading data to

def fetch(url, relative_path):
    """If a file has already been downloaded, fetch it from the cache, otherwise download it. Returns a path to
       the downloaded data"""
    if not os.path.exists(cache_path):
        os.makedirs(cache_path)
    file_path = os.path.join(cache_path, relative_path)
    if not os.path.exists(file_path):
        urllib.request.urlretrieve(url, file_path)
    return file_path

In [None]:
name1 = 'PCD_OA_LSOA_MSOA_LAD_FEB20_UK_LU.csv'
url1 = 'https://www.arcgis.com/sharing/rest/content/items/9a8866dd822c4f3d944b9497203ee5e5/data'
if not os.path.exists(f'source/{name1}.zip'):
    urllib.request.urlretrieve(url1, f'source/{name1}.zip')
    with zipfile.ZipFile(f'source/{name1}.zip', 'r') as zipRef:
        zipRef.extractall('source/')

In [None]:
# Set column names and datatype
dtype = ({
    'PCDS'    : str, # PCDS - one space between the district and sector-unit part of the postcode.
    'OA11CD'  : str, # Output Area (Census 2011)
    'LSOA11CD': str, # Lower Layer Super Output Areas Code (Census 2011)
    'MSOA11CD': str, # Middle Layer Super Output Areas Code (Census 2011)
    'MSOA11NM': str, # Middle Layer Super Output Areas (Census 2011))
})

# Define columns to read
cols = [2, 6, 7, 8, 11]

# Read data
postcode_LSOA_df = pd.read_csv(
    f'source/{name1}', usecols=cols, names=dtype.keys(), dtype=dtype, 
    skiprows=1, sep=',', encoding='latin-1').set_index('LSOA11CD')

In [None]:
# Function to run the whole code for postcodes attached to lsoas with IMD data

def produce_LSOA_IMD(df):

    # make sure formatting of postcode column matches the LSOA data
    df['Incident Postcode'] = df['Incident Postcode'].str.upper()
    
    # fetch the IMD data
    imd_data_path = fetch('https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/845345/File_7_-_All_IoD2019_Scores__Ranks__Deciles_and_Population_Denominators_3.csv', 'imd.csv')
    imd_data_df = pd.read_csv(imd_data_path)

    # merge df with postcode_LSOA
    result_df = pd.merge(df, postcode_LSOA_df, left_on='Incident Postcode',right_on='pcds', how='left')

    # merge df with the above merged df
    new_df = pd.merge(df, result_df, left_on='Incident Postcode',right_on='pcds', how='left').drop(columns='pcds')

    # final merge to produce the final_df
    final_df = pd.merge(new_df, imd_data_df, left_on='lsoa11cd',right_on='LSOA code (2011)', how='left').drop(columns='lsoa11cd')

    return final_df

In [None]:
# run the function

produce_LSOA_IMD(df)