In [None]:
import glob
import os

import pandas

In [None]:
scenarios = [os.path.basename(d) for d in sorted(glob.glob('eeh/0*'))]
scenarios

In [None]:
timesteps = [os.path.basename(d) for d in sorted(glob.glob('eeh/01-BaU/*'))]
timesteps

In [None]:
outputs = [os.path.basename(d) for d in glob.glob('eeh/01-BaU/2050/*')]
outputs

In [None]:
dfs = []
for scenario in scenarios:
    for timestep in timesteps:
        try:
            fname = f"eeh/{scenario}/{timestep}/zonalRailDemand.csv"
            df = pandas.read_csv(fname)
        except FileNotFoundError:
            print(fname, "not found")
        df['scenario'] = scenario
        dfs.append(df)
            
zonal = pandas.concat(dfs) \
    .sort_values(by=['scenario', 'year', 'LADcode'])
zonal.to_csv('eeh/zonal.csv', index=False)

In [None]:
zone_codes = pandas.read_csv('lads-codes-eeh.csv').lad19cd

In [None]:
zonal.head(1)

In [None]:
eeh_zonal = zonal[zonal.LADcode.isin(zone_codes)].copy().set_index(['scenario', 'year', 'LADcode'])
eeh_zonal.to_csv('eeh/eeh_zonal.csv')
eeh_zonal.head(1)

In [None]:
eeh_summary = eeh_zonal.reset_index() \
    .groupby(['scenario', 'year']) \
    .sum() \
    .drop(columns='stationsNo')
eeh_summary.to_csv('eeh/eeh_zonal_summary.csv')
eeh_summary

In [None]:
summary = zonal.groupby(['scenario', 'year']).sum().drop(columns='stationsNo')
summary.to_csv('eeh/zonal_summary.csv')
summary

In [None]:
dfs = []
for scenario in scenarios:
    for timestep in timesteps[1:]:
        fname = f"eeh/{scenario}/{timestep}/predictedRailDemand.csv"
        df = pandas.read_csv(fname)
        df['scenario'] = scenario
        dfs.append(df)
            
pred = pandas.concat(dfs) \
    .sort_values(by=['scenario', 'year', 'LADcode', 'NLC'])
pred.to_csv('eeh/station.csv', index=False)

In [None]:
eeh_pred = pred[pred.LADcode.isin(zone_codes)].copy().set_index(['scenario', 'year', 'NLC'])
eeh_pred.to_csv('eeh/eeh_station.csv')
eeh_pred.head(2)

In [None]:
eeh_pred.groupby(['scenario', 'year']).sum()[['YearUsage', 'DayUsage']]

In [None]:
pred.groupby(['scenario', 'year']).sum()[['YearUsage', 'DayUsage']]