# Summary table

This notebook tabulates key aggregates for each country and country-sector, namely: 

- `x`: total output
- `zuse`: intermediate use
- `va`: value added
- `zsales`: intermediate sales
- `y`: final sales
- `ez`: exports of intermediates
- `ey`: exports of final goods
- `e`: total exports

Select the MRIO version to load from `data/mrio/` as well as the corresponding output filename to be saved in `data/`.

## Set up

In [1]:
import numpy as np
import pandas as pd
import duckdb
from functions import zeroout

### Select MRIO version

In [2]:
input, output = 'adb-mrio.parquet', 'summary.parquet'
# input, output = 'adb-mrio62.parquet', 'summary62.parquet'
# input, output = 'adb-mrio62-const.parquet', 'summary62-const.parquet'

### Parameters

In [3]:
sectors = pd.read_excel('../data/raw/sectors.xlsx').drop_duplicates(subset='ind', ignore_index=True)
years = duckdb.sql(f"SELECT DISTINCT t FROM read_parquet('../data/mrio/{input}') ORDER BY t").df()['t']
rows = duckdb.sql(f"SELECT COUNT(*) FROM read_parquet('../data/mrio/{input}')").df()

N = 35                                              # Number of sectors
G = int((rows.iloc[0, 0] / len(years) - 7) / N)     # Number of countries + 1
f = 5                                               # Number of final demand components

np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

## Compile table

In [4]:
df = pd.DataFrame()

for year in years:
    
    mrio = duckdb.sql(f"SELECT * EXCLUDE(t, si) FROM read_parquet('../data/mrio/{input}') WHERE t={year}").df()
    mrio = mrio.values

    x = mrio[-1][:(G*N)]
    Z = mrio[:(G*N)][:, :(G*N)]
    zuse, zsales = np.sum(Z, axis=0), np.sum(Z, axis=1)
    va = np.sum(mrio[-7:-1][:, :(G*N)], axis=0)
    Y_big = mrio[:(G*N)][:, (G*N):-1]
    Y = Y_big @ np.kron(np.eye(G), np.ones((f, 1)))
    Zd = zeroout(Z @ np.kron(np.eye(G), np.ones((N, 1))), inverse=True)
    Yd = zeroout(Y, inverse=True)
    y, ez, ey = np.sum(Y, axis=1), np.sum(Zd, axis=1), np.sum(Yd, axis=1)

    df_t = pd.DataFrame({
        't': int(year),
        's': np.arange(1, G+1).repeat(N),
        'i': np.tile(sectors['ind'], G),
        'i5': np.tile(sectors['ind5'], G),
        'i15': np.tile(sectors['ind15'], G),
        'x': x,
        'zuse': zuse,
        'va': va,
        'zsales': zsales,
        'y': y,
        'e': ez + ey,
        'ez': ez,
        'ey': ey
    })
    df = pd.concat([df, df_t], ignore_index=True)

    print(f'{year} done')

df.to_parquet(f'../data/{output}', index=False)

2017 done
2018 done
2019 done
2020 done
2021 done
2022 done


### View results

In [5]:
duckdb.sql(f"SELECT * FROM read_parquet('../data/{output}')").df()

Unnamed: 0,t,s,i,i5,i15,x,zuse,va,zsales,y,e,ez,ey
0,2017,1,1,1,1,76590.551093,39230.167281,37360.383812,55280.401502,21310.149591,63476.853495,44961.052609,18515.800886
1,2017,1,2,1,2,219904.607558,103343.713942,116560.893616,210815.252994,9089.354564,52708.145739,44595.761206,8112.384533
2,2017,1,3,2,3,68876.130878,47391.068728,21485.062150,26554.426352,42321.704527,51708.037310,21684.304876,30023.732434
3,2017,1,4,2,3,3577.508549,1693.892772,1883.615778,2038.006691,1539.501858,2166.250503,1073.980567,1092.269937
4,2017,1,5,2,3,830.406474,524.261535,306.144939,455.295008,375.111466,425.758720,149.794737,275.963982
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15325,2022,73,31,5,13,768686.727805,413054.290581,355632.437224,116428.950607,652257.777198,750472.820805,103670.456683,646802.364122
15326,2022,73,32,5,14,560981.345278,305323.724709,255657.620568,54315.563443,506665.781835,547811.017989,48073.302218,499737.715770
15327,2022,73,33,5,14,500587.276573,337864.812953,162722.463620,87486.347829,413100.928744,490716.937291,81084.654083,409632.283208
15328,2022,73,34,5,15,424229.194155,270108.080452,154121.113702,213124.271225,211104.922930,412932.530760,206200.185406,206732.345354
