Import libraries

In [1]:
import pandas as pd
import numpy as np
import requests
import json
import os
import geopandas as gpd
import copy as cp

In [2]:
# get data and write to files

ic_url = 'http://opendatacommunities.org/downloads/cube-table?uri=http%3A%2F%2Fopendatacommunities.org%2Fdata%2Fsocietal-wellbeing%2Fdeprivation%2Fimd-income-score-2010'

r = requests.get(ic_url)
name = ic_url.split('%2F')[-2] + '_' + ic_url.split('%2F')[-1]
fo = open(name + '.csv', 'wb')
fo.write(r.content)
fo.close()

In [32]:
# load files (those that were just downloaded, plus the census one)

imd_income = pd.read_csv('deprivation_imd-income-score-2010.csv')

cen_age = pd.read_csv('raw_data/Data_AGE_UNIT.csv')
cen_health = pd.read_csv('raw_data/Data_HEALTH_UNIT.csv')
cen_heat = pd.read_csv('raw_data/Data_HEAT_UNIT.csv')
cen_depr = pd.read_csv('raw_data/Data_DEPR_UNIT.csv')
cen_hhold = pd.read_csv('raw_data/Data_HOUSE_UNIT.csv')
cen_ethgrp = pd.read_csv('raw_data/Data_ETHGRP_UNIT.csv')
cen_cars = pd.read_csv('raw_data/Data_CARS_UNIT.csv')

In [62]:
# make functions to organise data

def clean_census_data(data):
    # remove unwanted columns
    clean_data = (data.drop(['CDU_ID', 'GEO_LABEL', 'GEO_TYPE', 'GEO_TYP2'], axis =1)\
                  .drop([0], axis = 0)\
                  .set_index('GEO_CODE')\
                  .T)
    
    clean_data = clean_data.dropna(axis = 0).T
    
    return(clean_data)

In [57]:
data_age = clean_census_data(cen_age)
data_health = clean_census_data(cen_health)
data_heat = clean_census_data(cen_heat)
data_depr = clean_census_data(cen_depr)
data_hhold = clean_census_data(cen_hhold)
data_ethgrp = clean_census_data(cen_ethgrp)
data_cars = clean_census_data(cen_cars)

In [60]:
data_age.to_csv('data/cleaned_age.csv')
data_health.to_csv('data/cleaned_health.csv')
data_heat.to_csv('data/cleaned_heat.csv')
data_depr.to_csv('data/cleaned_depr.csv')
data_hhold.to_csv('data/cleaned_hhold.csv')
data_ethgrp.to_csv('data/cleaned_ethgrp.csv')
data_cars.to_csv('data/cleaned_cars.csv')

In [70]:
def get_meta_data(data):
    # remove unwanted columns
    meta_data = (data.dropna(axis = 1)\
                .iloc[:1]\
                .T\
                )
    
    # separate into various columns
    for i in range(meta_data.iloc[0][0].count(' - ')):
        name = str(meta_data.iloc[0][0].split(' - ')[i].split(' : ')[0])
        
        # extract individual strings
        temp_list = []
        for j in range(len(meta_data)):
            var_string = str(meta_data.iloc[j][0].split(' - ')[i].split(' : ')[1])
            temp_list.append(var_string)
            #print(temp_list)

        # make columns
        meta_data[name] = temp_list
        #print(temp_list)
        
    # remove unwanted column
    meta_data = meta_data.drop([0], axis = 1)
    
    # clean headers
    meta_data.columns = meta_data.columns.str.strip().str.lower().str.replace(' ', '_').str.replace(')', '').str.replace('(', '').str.replace(';', '')
        
    return(meta_data)

In [74]:
meta_age = get_meta_data(cen_age)
meta_health = get_meta_data(cen_health)
meta_heat = get_meta_data(cen_heat)
meta_depr = get_meta_data(cen_depr)
meta_hhold = get_meta_data(cen_hhold)
meta_ethgrp = get_meta_data(cen_ethgrp)
meta_cars = get_meta_data(cen_cars)

In [80]:
meta_data = meta_age.append(meta_health).append(meta_heat).append(meta_depr).append(meta_hhold).append(meta_ethgrp).append(meta_cars)

In [81]:
meta_data.to_csv('data/meta_all.csv')

In [None]:
neis = gpd.read_file('lsoas/Lower_Layer_Super_Output_Areas_December_2001_Generalised_Clipped_Boundaries_in_England_and_Wales.shp')

In [None]:
neis.plot()