# Power Distributions across LSOAs

To understand in which way data can be aggregated we are looking here at power distributions across LSOAs. The question is: is thermal power differently distributed across LSOAs? If yes, we would need to consider different distributions when aggregating data.

In [None]:
import os
from pathlib import Path
from datetime import timedelta, datetime

import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
import seaborn as sns
import pandas as pd
import numpy as np
import sqlalchemy

import urbanoccupants as uo

PATH_TO_SIMULATION_RESULTS = Path('../build/sim-output.db')

In [None]:
def read_thermal_power(path_to_simulation_results):
    disk_engine = sqlalchemy.create_engine('sqlite:///{}'.format(path_to_simulation_results))
    dwellings = _read_dwellings(disk_engine)
    
    dwellingId_to_region = {dwellingId: dwellings.loc[dwellingId, 'region']
                            for dwellingId in dwellings.index}
    thermal_power = pd.read_sql_query(
        'SELECT * FROM thermalPower',
        disk_engine,
        index_col='timestamp',
        parse_dates=True
    )
    thermal_power.index = pd.to_datetime(thermal_power.index * 1000 * 1000)
    thermal_power.index.name = 'datetime'
    thermal_power.rename(columns={'id': 'dwelling_id'}, inplace=True)
    thermal_power['region'] = thermal_power.dwelling_id.map(dwellingId_to_region)
    return thermal_power.reset_index()


def _read_dwellings(disk_engine):
    dwellings = pd.read_sql_query(
        'SELECT * FROM {}'.format(uo.DWELLINGS_TABLE_NAME),
        disk_engine,
        index_col='index'
    )
    people = pd.read_sql_query(
        'SELECT * FROM {}'.format(uo.PEOPLE_TABLE_NAME),
        disk_engine,
        index_col='index'
    )
    dwellings['householdSize'] = people.groupby('dwellingId').size()
    return dwellings

In [None]:
def _plot_thermal_power(thermal_power, path_to_plot):
    def _xTickFormatter(x, pos):
        return pd.to_datetime(x).time()
    fig = plt.figure(figsize=(8, 4), dpi=300)
    ax1 = fig.add_subplot(2, 1, 1)
    sns.tsplot(
        data=thermal_power.groupby(['datetime', 'region']).value.mean().reset_index(),
        time='datetime',
        unit='region',
        value='value',
        err_style='unit_traces',
        ax=ax1
    )
    _ = plt.ylabel('average [W]')
    _ = plt.xlabel('')
    ax1.set_ylim(bottom=0)

    ax2 = fig.add_subplot(2, 1, 2, sharex=ax1)
    sns.tsplot(
        data=thermal_power.groupby(['datetime', 'region']).value.std().reset_index(),
        time='datetime',
        unit='region',
        value='value',
        err_style='unit_traces',
        ax=ax2
    )
    _ = plt.ylabel('standard deviation [W]')
    _ = plt.xlabel('time of the day')
    ax2.set_ylim(bottom=0)

    points_in_time = thermal_power.groupby('datetime').value.mean().index
    xtick_locations = [5, 5 + 144 // 2, 149, 149 + 144 // 2] # not sure why they are shifted
    ax2.set_xticks([points_in_time[x].timestamp() * 10e8 for x in xtick_locations])
    ax2.xaxis.set_major_formatter(mpl.ticker.FuncFormatter(_xTickFormatter))

    ax1.label_outer()
    ax2.label_outer()

    fig.savefig(path_to_plot, dpi=300)

In [None]:
thermal_power = read_thermal_power(PATH_TO_SIMULATION_RESULTS)

In [None]:
fig = plt.figure(figsize=(14, 10))
ax1 = fig.add_subplot(411)
sns.violinplot(
    data=thermal_power[(thermal_power.datetime < datetime(2005, 1, 7, 14, 0)) & 
                       (thermal_power.datetime > datetime(2005, 1, 7, 12, 0)) &
                       (thermal_power.region == thermal_power.region.unique()[0])],
    x='datetime',
    y='value',
    ax=ax1
)
_ = plt.xlabel('')

ax2 = fig.add_subplot(412)
sns.violinplot(
    data=thermal_power[(thermal_power.datetime < datetime(2005, 1, 7, 14, 0)) & 
                       (thermal_power.datetime > datetime(2005, 1, 7, 12, 0)) &
                       (thermal_power.region == thermal_power.region.unique()[10])],
    x='datetime',
    y='value',
    ax=ax2
)
_ = plt.xlabel('')

ax3 = fig.add_subplot(413)
sns.violinplot(
    data=thermal_power[(thermal_power.datetime < datetime(2005, 1, 7, 14, 0)) & 
                       (thermal_power.datetime > datetime(2005, 1, 7, 12, 0)) &
                       (thermal_power.region == thermal_power.region.unique()[100])],
    x='datetime',
    y='value',
    ax=ax3
)
_ = plt.xlabel('')

ax4 = fig.add_subplot(414)
sns.violinplot(
    data=thermal_power[(thermal_power.datetime < datetime(2005, 1, 7, 14, 0)) & 
                       (thermal_power.datetime > datetime(2005, 1, 7, 12, 0)) &
                       (thermal_power.region == thermal_power.region.unique()[127])],
    x='datetime',
    y='value',
    ax=ax4
)
_ = plt.xticks(rotation=45)

ax1.label_outer()
ax2.label_outer()
ax3.label_outer()
ax4.label_outer()

_ = plt.suptitle("Thermal power distributions in 4 arbitrarily chosen LSOAs [W]")
fig.savefig('../build/power-distributions-across-lsoas-10min.png')

In [None]:
four_hourly = thermal_power.set_index('datetime').groupby('dwelling_id').resample('4H').agg({'value': 'mean', 'region': 'first'}).reset_index()

In [None]:
fig = plt.figure(figsize=(14, 10))
ax1 = fig.add_subplot(411)
sns.violinplot(
    data=four_hourly[four_hourly.region == four_hourly.region.unique()[0]],
    x='datetime',
    y='value',
    ax=ax1
)
_ = plt.xlabel('')

ax2 = fig.add_subplot(412, sharex=ax1)
sns.violinplot(
    data=four_hourly[four_hourly.region == four_hourly.region.unique()[10]],
    x='datetime',
    y='value',
    ax=ax2
)
_ = plt.xlabel('')

ax3 = fig.add_subplot(413, sharex=ax1)
sns.violinplot(
    data=four_hourly[four_hourly.region == four_hourly.region.unique()[100]],
    x='datetime',
    y='value',
    ax=ax3
)
_ = plt.xlabel('')

ax4 = fig.add_subplot(414, sharex=ax1)
sns.violinplot(
    data=four_hourly[four_hourly.region == four_hourly.region.unique()[127]],
    x='datetime',
    y='value',
    ax=ax4
)
_ = plt.xticks(rotation=45)

ax1.label_outer()
ax2.label_outer()
ax3.label_outer()
ax4.label_outer()

_ = plt.suptitle("Thermal power distributions in 4 arbitrarily chosen LSOAs [W]")
fig.savefig('../build/power-distributions-across-lsoas-4h.png')

Distributions between LSOAs at certain points in time differ, unsurprisingly. The differences seem to be explainable though different parameterisation of the same distribution though. More important seems to be the difference in distributions across points in time of the day where distributions seem to be qualitatively different.