# Check smif outputs correspond to energy demand model outputs

Energy demand model reports high-level totals in logging. Check that these match the outputs
as processed by smif and passed on to other models (energy supply) and for analysis.

- read smif outputs from an energy demand model run
- read text file outputs produced by energy demand directly from the same run
- aggregate, compare/contrast, check totals, check any source of mismatch

In [None]:
import glob
import os
import re

import numpy as np
import pandas

In [None]:
outputs = glob.glob("../results/energy_demand_constrained/energy_demand_constrained/decision_0/*.csv")
len(outputs), outputs[0]

In [None]:
enduses = set()

r = re.compile('_$')
for fname in outputs:
    slug, _ = os.path.splitext(os.path.basename(fname))
    timestep = slug[-4:]
    sector = re.search('industry|service|residential', slug)[0]
    rest = slug[:-4].replace(f"output_","") \
        .replace("_timestep_", "") \
        .replace(f"{sector}_","")
    fuel = re.search('oil_|biomass_|electricity_|gas_|hydrogen_|solid_fuel_', rest)[0]
    fuel = re.sub('_$', "", fuel)
    enduse = rest.replace(f"{fuel}_", "")
    enduses.add(enduse)
    

for e in sorted(enduses):
    print(e)

In [None]:
pandas.read_csv(outputs[0]).head(2)

In [None]:
dfs = []
for fname in outputs:
    slug, _ = os.path.splitext(os.path.basename(fname))
    timestep = slug[-4:]
#     sector = re.search('industry|service|residential', slug)[0]
    enduse = slug[:-4].replace(f"output_","").replace("_timestep_", "")
    df = pandas.read_csv(fname) \
        .drop(columns='hourly') \
        .groupby('lad_uk_2016') \
        .sum() \
        .rename(columns={enduse: 'value'})
#     df['timestep'] = int(timestep)
#     df['sector'] = sector
    df['enduse'] = enduse
    print(enduse)
    dfs.append(df)
    
smif_output = pandas.concat(dfs)

In [None]:
smif_output_annual_national = smif_output.reset_index() \
    .drop(columns='lad_uk_2016') \
    .groupby('enduse') \
    .sum() \
    .reset_index()

def extract_sector(e):
    return re.search('industry|service|residential', e)[0]

smif_output_annual_national['sector'] = smif_output_annual_national.enduse \
    .apply(extract_sector)

def extract_fuel(e):
    match_fuel = '_oil_|_biomass_|_electricity_|_gas_|_hydrogen_|_solid_fuel_'
    return re.search(match_fuel, e)[0] \
        .replace("_","") \
        .replace("solidfuel", "solid_fuel")

smif_output_annual_national['fuel'] = dfa.enduse \
    .apply(extract_fuel)

smif_output_annual_national

In [None]:
smif_output_by_fuel = smif_output_annual_national.drop(columns='enduse') \
    .groupby('fuel') \
    .sum()
smif_output_by_fuel

In [None]:
smif_output_by_fuel.sum()

In [None]:
res = smif_output_annual_national.sort_values(by=['fuel','sector','enduse'])[['fuel','sector','enduse','value']]
res = res[res.sector=='residential']
res

## Demand npy results

These are written out directly by the demand model, not via smif.

- Regional/hourly output per fuel type
- National/hourly output per enduse and fuel

In [None]:
regs_yh15 = np.load('../data/energy_demand/results/model_run_pop/ed_fueltype_regs_yh/result_tot_submodels_fueltypes__2015__.npy')
regs_yh20 = np.load('../data/energy_demand/results/model_run_pop/ed_fueltype_regs_yh/result_tot_submodels_fueltypes__2020__.npy')
regs_yh = regs_yh15 + regs_yh20

In [None]:
regs_yh.shape # 7 fuels, 391 regions, 8760 hours

In [None]:
regs_yh.sum()

In [None]:
fuels = ['solid_fuel', 'gas', 'electricity', 'oil', 'biomass', 'hydrogen', 'heat']

In [None]:
summed = regs_yh.sum(axis=1).sum(axis=1)
data = []
for fuel, value in zip(fuels, summed):
    d = {
        'fuel': fuel,
        'value': value
    }
    data.append(d)
    
ed_output_by_fuel = pandas.DataFrame(data).sort_values(by='fuel').set_index('fuel')
ed_output_by_fuel

In [None]:
demand_outputs = glob.glob('../data/energy_demand/results/model_run_results_txt/enduse_specific_results/*.npy')
len(demand_outputs), demand_outputs[0]

In [None]:
m = np.load(demand_outputs[0])
print(m.shape) # 7 fuels, 8760 hours

In [None]:
data = []

for fname in demand_outputs:
    s = fname.split("__")
    enduse, timestep = s[1], s[2]

    m = np.load(fname)
    by_fuel = m.sum(axis=1)
    for fuel, value in zip(fuels, by_fuel):
        d = {
            'enduse': enduse,
            'timestep': timestep,
            'fuel': fuel,
            'value': value
        }
        data.append(d)
    
ed_output_by_fuel_enduse = pandas.DataFrame(data)
ed_output_by_fuel_enduse

In [None]:
ed_output_by_fuel = ed_output_by_fuel_enduse.groupby(['fuel']).sum()
ed_output_by_fuel

### Further into residential electricity use

ed_output gives the breakdown by enduse (lighting, heating...) where smif_output gives further breakdown into technologies for heating (boiler, heat pump...) but lumps all other electricity use into "non-heating"

In [None]:
ed_output_by_fuel_enduse[
    (demand_df.fuel == 'electricity') & demand_df.enduse.str.startswith('rs') & demand_df.value > 0
].groupby('enduse').sum()

In [None]:
smif_output_annual_national[
    (smif_output_annual_national.fuel == 'electricity') 
    & smif_output_annual_national.enduse.str.startswith('res')
]

## Comparisons

Compare total output

In [None]:
np.isclose(smif_output.value.sum(), regs_yh.sum())

In [None]:
regs_yh.sum()

In [None]:
smif_output.value.sum()

Compare breakdown by fuel

In [None]:
a = ed_output_by_fuel.rename(columns={'value':'ed'})
b = smif_output_by_fuel.rename(columns={'value':'smif'})
df = a.join(b, on='fuel')
df['gap'] = df.ed - df.smif
df['gap_is_small'] = np.abs(df.gap) < 1e-9
df