# Analysis of several Haringey simulation

In [None]:
import os
import sys
from pathlib import Path
import math

import numpy as np
import pandas as pd
import sqlalchemy
import pytz
import requests_cache
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context("notebook", font_scale=1.25, rc={"lines.linewidth": 2.5})
%matplotlib inline

module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path)
import ktp.census

In [None]:
# settings
NAME = "all-run-results"
SPATIAL_RESOLUTION = ktp.census.GeographicalLayer.WARD
PATHS_TO_RESULT_FILES = {'age': Path('./build/haringey-scenario-lsoa-age/haringey-scenario-lsoa-age-results.db'),
                         'economic': Path('./build/haringey-scenario-lsoa-economic/haringey-scenario-lsoa-economic-results.db')}

In [None]:
CACHE_FOLDER_PATH = Path('./build/')
BUILD_FOLDER_PATH = Path('./build/') / NAME
BUILD_FOLDER_PATH.mkdir(parents=True, exist_ok=True)
TUS_DATA_FOLDER_PATH = Path('./data/UKDA-4504-tab/')

In [None]:
requests_cache.install_cache((CACHE_FOLDER_PATH / 'web-cache').as_posix())

## Read Data

In [None]:
householdTypes = ktp.census.read_household_type_data(SPATIAL_RESOLUTION)
age_structure = ktp.census.read_age_structure_data(SPATIAL_RESOLUTION)
qualification_data = ktp.census.read_qualification_level_data(SPATIAL_RESOLUTION)
economic_activity_data = ktp.census.read_economic_activity_data(SPATIAL_RESOLUTION)

In [None]:
AGE_MAP = {
    ktp.types.AgeStructure.AGE_0_TO_4: 2.5,
    ktp.types.AgeStructure.AGE_5_TO_7: 6.5,
    ktp.types.AgeStructure.AGE_8_TO_9: 9,
    ktp.types.AgeStructure.AGE_10_TO_14: 12.5,
    ktp.types.AgeStructure.AGE_15: 15.5,
    ktp.types.AgeStructure.AGE_16_TO_17: 17,
    ktp.types.AgeStructure.AGE_18_TO_19: 19,
    ktp.types.AgeStructure.AGE_20_TO_24: 22.5,
    ktp.types.AgeStructure.AGE_25_TO_29: 27.5,
    ktp.types.AgeStructure.AGE_30_TO_44: 37.5,
    ktp.types.AgeStructure.AGE_45_TO_59: 52.5,
    ktp.types.AgeStructure.AGE_60_TO_64: 62.5,
    ktp.types.AgeStructure.AGE_65_TO_74: 70,
    ktp.types.AgeStructure.AGE_75_TO_84: 80,
    ktp.types.AgeStructure.AGE_85_TO_89: 87.5,
    ktp.types.AgeStructure.AGE_90_AND_OVER: 95 # FIXME
}

def meanAge(age_structure):
    age_structure_num = age_structure.copy()
    for col in age_structure:
        age_structure_num[col] = age_structure[col] * AGE_MAP[col]
    return age_structure_num.sum(axis=1) / age_structure.sum(axis=1)
    
    
def percent_highest_qualification(qualification_data):
    return qualification_data[ktp.types.Qualification.LEVEL_45] / qualification_data.sum(axis=1)


def percent_economic_active(economic_activity_data):
    total_active = economic_activity_data[[ktp.types.EconomicActivity.EMPLOYEE_PART_TIME, 
                                           ktp.types.EconomicActivity.EMPLOYEE_FULL_TIME,
                                           ktp.types.EconomicActivity.SELF_EMPLOYED, 
                                           ktp.types.EconomicActivity.ACTIVE_FULL_TIME_STUDENT]].sum(axis=1)
    return total_active / economic_activity_data.sum(axis=1) 

In [None]:
def load_thermal_power(name, path_to_file):
    disk_engine = sqlalchemy.create_engine('sqlite:///{}'.format(path_to_file))
    
    thermal_power = pd.read_sql_query('SELECT * FROM thermalPower', disk_engine, index_col='timestamp', parse_dates=True)
    thermal_power.index = pd.to_datetime(thermal_power.index * 1000 * 1000)
    thermal_power.index.name = 'datetime'
    thermal_power = thermal_power.pivot(columns='id')
    thermal_power.columns = thermal_power.columns.droplevel(0)
    thermal_power.name = 'thermal power'
    dwellings = pd.read_sql_query('SELECT * FROM dwellings', disk_engine, index_col='index')
    mean_thermal_power = thermal_power\
        .groupby(axis=1, by=lambda dwellingId: dwellings.loc[dwellingId, 'region'])\
        .mean()
    mean_thermal_power['run'] = name
    mean_thermal_power.reset_index(inplace=True)
    return mean_thermal_power.set_index(['run', 'datetime'])
    
    
def generate_derived_results(name, thermal_power):
    geo_data = ktp.census.read_haringey_shape_file(SPATIAL_RESOLUTION)
    geo_data['average_power'] = thermal_power.mean()
    geo_data['number_households'] = householdTypes.sum(axis=1)
    geo_data['number citizens'] = age_structure.sum(axis=1)
    geo_data['avg household size'] = age_structure.sum(axis=1)/householdTypes.sum(axis=1)
    geo_data['avg age'] = meanAge(age_structure)
    geo_data['percent highest qual'] = percent_highest_qualification(qualification_data)
    geo_data['percent economic act'] = percent_economic_active(economic_activity_data)
    geo_data['run'] = name
    return geo_data

In [None]:
thermal_power = pd.concat(load_thermal_power(name, path) for name, path in PATHS_TO_RESULT_FILES.items())

In [None]:
all_results = pd.concat(generate_derived_results(name, thermal_power.loc[name, :]) 
                        for name in PATHS_TO_RESULT_FILES.keys())

## Plot Data

In [None]:
fig = plt.figure(figsize=(14, 7))
plt.title('Average thermal power per {} for different runs'.format(SPATIAL_RESOLUTION))
long_form_thermal_power = thermal_power.copy().unstack().unstack().reset_index()\
    .rename(columns={0: 'thermal_power', 'level_0': 'region'})

number_results = len(PATHS_TO_RESULT_FILES.keys())
for i, name in enumerate(PATHS_TO_RESULT_FILES.keys()):
    if i == 0:
        ax = plt.subplot(number_results, 1, i + 1)
        first_ax = ax
    else:
        ax = plt.subplot(number_results, 1, i + 1, sharex=first_ax)
    sns.tsplot(long_form_thermal_power[long_form_thermal_power.run == name], 
           time='datetime',
           unit='region',
           value='thermal_power',
           err_style='unit_points',
           ci=100,
           color=sns.color_palette()[i])
    plt.setp(ax.get_xticklabels(), visible=False)
    plt.xlabel('')
    plt.ylabel('thermal power [W]')
    plt.title(name)
fig.savefig((BUILD_FOLDER_PATH / 'thermal_power.png').as_posix())

In [None]:
fig = plt.figure(figsize=(7, 7))
sns.violinplot(data=all_results, x='run', y='average_power', legend=True)
_ = plt.ylabel('average thermal power per household [W]')
_ = plt.title("Distribution of average thermal power per household among {}".format(SPATIAL_RESOLUTION))
fig.savefig((BUILD_FOLDER_PATH / "distributation-average-power.png").as_posix())

In [None]:
fig = sns.pairplot(
    data=all_results,
    y_vars=['average_power'],
    x_vars=['avg household size', 'avg age', 
            'percent highest qual', 'percent economic act'],
    hue='run')
fig.savefig((BUILD_FOLDER_PATH / 'pairwise-distributions.png').as_posix())