In [None]:
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
import zipfile
import io

import pandas as pd
import sqlalchemy
import requests_cache
import requests
import geopandas as gpd

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context("notebook", font_scale=1.25, rc={"lines.linewidth": 2.5})
%matplotlib inline

In [None]:
PATH_TO_RESULT_FILE = Path('./build/haringey-scenario-lsoa-results.db').absolute()

LONDON_BOUNDARY_FILE_URL = 'https://files.datapress.com/london/dataset/statistical-gis-boundary-files-london/2016-10-03T13:52:28/statistical-gis-boundaries-london.zip'
LSOA_SHAPE_FILE_PATH = Path('./statistical-gis-boundaries-london/ESRI/LSOA_2011_London_gen_MHW.shp')
BUILD_FOLDER = Path('./build')
BUILD_FOLDER.mkdir(parents=True, exist_ok=True)

In [None]:
disk_engine = sqlalchemy.create_engine('sqlite:///{}'.format(PATH_TO_RESULT_FILE))
requests_cache.install_cache((BUILD_FOLDER / 'cache').as_posix())

In [None]:
def timedelta_from_iso_string(timedelta_as_string):
    t = datetime.strptime(timedelta_as_string,"PT%MM%SS")
    return timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)

assert timedelta_from_iso_string('PT19M43S') == timedelta(minutes=19) + timedelta(seconds=43)

## Read Metadata

In [None]:
metadata = pd.read_sql_query('SELECT * FROM metadata', disk_engine, index_col='key', parse_dates=True)

In [None]:
metadata

In [None]:
metadata = dict(zip(metadata.index, [value[0] for value in metadata.values]))

In [None]:
simulation_duration = timedelta_from_iso_string(metadata['durationOfSimulation'])

## Dwellings

In [None]:
dwellings = pd.read_sql_query('SELECT * FROM dwellings', disk_engine, index_col='index')

In [None]:
dwellings.head()

## People

In [None]:
people = pd.read_sql_query('SELECT * FROM people', disk_engine, index_col='index')
people.head()

In [None]:
dwellings['householdSize'] = people.groupby('dwellingId').size()

## Thermal Power

In [None]:
thermal_power = pd.read_sql_query('SELECT * FROM thermalPower', disk_engine, index_col='timestamp', parse_dates=True)
thermal_power.index = pd.to_datetime(thermal_power.index * 1000 * 1000)
thermal_power.index.name = 'datetime'
thermal_power = thermal_power.pivot(columns='id')
thermal_power.columns = thermal_power.columns.droplevel(0)
thermal_power.name = 'thermal power'

In [None]:
dwellings['average_power'] = thermal_power.mean()

In [None]:
ax = thermal_power\
    .groupby(axis=1, by=lambda id: dwellings.loc[id, 'region'])\
    .mean()['2005-01-01']\
    .plot(figsize=(14, 7), legend=None)
_ = plt.ylabel('average thermal power per household [W]')
_ = plt.title('Average of thermal power per household in different LSOA')
fig = ax.get_figure()
fig.savefig((BUILD_FOLDER / 'thermal_power_per_lsoa.png').as_posix())

In [None]:
r = requests.get(LONDON_BOUNDARY_FILE_URL)
z = zipfile.ZipFile(io.BytesIO(r.content))
with tempfile.TemporaryDirectory(prefix='london-boundary-files') as tmpdir:
    z.extractall(path=tmpdir)
    lsoa_file = Path(tmpdir) / LSOA_SHAPE_FILE_PATH
    lsoa_data = gpd.read_file(lsoa_file.as_posix())
lsoa_data = lsoa_data[lsoa_data.LAD11NM == 'Haringey']

In [None]:
lsoa_data.head()

In [None]:
import geopandasplotting as gpdplt

In [None]:
ax = gpdplt.plot_dataframe(
    lsoa_data.join(dwellings.groupby('region').average_power.mean(), on='LSOA11CD'),
    column='average_power',
    categorical=False, 
    linewidth=0.2, 
    legend=True,
    figsize=(14, 7),
    cmap='viridis'
)
_ = plt.title("Average Thermal Power per Household in different LSOAs [W]")
_ = plt.xticks([])
_ = plt.yticks([])
fig = ax.get_figure()
fig.savefig((BUILD_FOLDER / 'thermal_power_lsoa_choropleth.png').as_posix())

In [None]:
max_power_lsoa = dwellings.groupby('region').average_power.mean().max()
min_power_lsoa = dwellings.groupby('region').average_power.mean().min()
print(max_power_lsoa/min_power_lsoa)

There is a range of roughly 8% difference between the lsoa with the highest energy consumption and the lsoa with the lowest energy consumption.

In [None]:
fig = plt.figure(figsize=(7, 7))
sns.violinplot(data=dwellings.groupby('region').average_power.mean(), jitter=True)
_ = plt.ylabel('average thermal power per household [W]')
_ = plt.xticks([])
_ = plt.title("Distribution of average thermal power per household among LSOAs")
fig.savefig((BUILD_FOLDER / "distributation-average-power.png").as_posix())

In [None]:
fig = plt.figure(figsize=(14, 7))
sns.boxplot(data=dwellings, x='householdSize', y='average_power')
_ = plt.ylabel("average thermal power per household [W]")
_ = plt.xlabel("household size")
_ = plt.title("Average thermal power per household for different household sizes")
fig.savefig((BUILD_FOLDER / "thermal-power-vs-household-size.png").as_posix())