# Summary table

This notebook tabulates key aggregates for each country and country-sector, namely: 

- `x`: total output
- `zuse`: intermediate use
- `va`: value added
- `zsales`: intermediate sales
- `y`: final sales
- `ez`: exports of intermediates
- `ey`: exports of final goods
- `e`: total exports

Results are saved as `summary.csv` in `data/final/`.

In [10]:
import numpy as np
import pandas as pd
import os
import re
import duckdb
from functions import zeroout

## Set up

In [11]:
# inputfolder = 'ADB-MRIO'
# outputfile = 'summary'

inputfolder = 'ADB-MRIO62'
outputfile = 'summary62'

# inputfolder = 'ADB-MRIO_jun2023'

version = None
# version = 'jun2023'

filelist = [file for file in os.listdir(f'../data/interim/{inputfolder}') if not file.startswith('.')]
filelist.sort()

In [12]:
sectors = pd.read_excel('../data/interim/sectors.xlsx')
sectors = sectors.drop_duplicates(subset='ind', ignore_index=True)

# G = 73      # Number of countries + ROW
G = 63
N = 35      # Number of sectors
f = 5       # Number of final demand components

np.seterr(divide='ignore', invalid='ignore')

{'divide': 'ignore', 'over': 'warn', 'under': 'ignore', 'invalid': 'ignore'}

## Compile table

In [13]:
DF = pd.DataFrame()

for file in filelist:
    
    year = re.search('[0-9]{4}', file).group()

    mrio = duckdb.sql(
        f"""
        SELECT * EXCLUDE(C0)
        FROM read_parquet('../data/interim/{inputfolder}/{file}')
        """
    ).df()
    mrio = mrio.values

    x = mrio[-1][:(G*N)]
    Z = mrio[:(G*N)][:, :(G*N)]
    zuse = np.sum(Z, axis=0)
    zsales = np.sum(Z, axis=1)
    va = np.sum(mrio[-7:-1][:, :(G*N)], axis=0)
    Y_big = mrio[:(G*N)][:, (G*N):-1]
    Y = Y_big @ np.kron(np.eye(G), np.ones((f, 1)))
    Zd = zeroout(Z @ np.kron(np.eye(G), np.ones((N, 1))), inverse=True)
    Yd = zeroout(Y, inverse=True)
    y = np.sum(Y, axis=1)
    ez = np.sum(Zd, axis=1)
    ey = np.sum(Yd, axis=1)

    DFi = pd.DataFrame({
        't': year,
        's': np.arange(1, G+1).repeat(N),
        'i': np.tile(sectors['ind'], G),
        'i5': np.tile(sectors['ind5'], G),
        'i15': np.tile(sectors['ind15'], G),
        'x': x,
        'zuse': zuse,
        'va': va,
        'zsales': zsales,
        'y': y,
        'e': ez + ey,
        'ez': ez,
        'ey': ey
    })
    DF = pd.concat([DF, DFi], ignore_index=True)

    print(f'{year} done')

if version is None:
    outputfilename = f'{outputfile}.csv'
else:
    outputfilename = f'{outputfile}_{version}.csv'

DF.to_csv(f'../data/final/{outputfilename}', index=False)

2000 done
2007 done
2008 done
2009 done
2010 done
2011 done
2012 done
2013 done
2014 done
2015 done
2016 done
2017 done
2018 done
2019 done
2020 done
2021 done


In [14]:
DF

Unnamed: 0,t,s,i,i5,i15,x,zuse,va,zsales,y,e,ez,ey
0,2000,1,1,1,1,3.193114e+04,17552.069348,14379.073025,26201.419947,5729.722426,2.484790e+04,20321.508799,4526.392603
1,2000,1,2,1,2,3.401253e+04,13149.310624,20863.216969,32445.331035,1567.196559,1.437000e+04,13038.688683,1331.306334
2,2000,1,3,2,3,3.420367e+04,24735.352284,9468.317206,17106.492506,17097.176984,2.495872e+04,14063.061725,10895.662622
3,2000,1,4,2,3,3.055290e+03,1888.371583,1166.918433,1891.897568,1163.392448,1.210870e+03,603.869736,607.000059
4,2000,1,5,2,3,6.788278e+02,486.097892,192.729927,500.416684,178.411134,1.837121e+02,104.557562,79.154574
...,...,...,...,...,...,...,...,...,...,...,...,...,...
35275,2021,63,31,5,13,1.086080e+06,509844.654261,576235.047524,125219.095513,960860.606272,1.066780e+06,112430.749017,954349.201263
35276,2021,63,32,5,14,7.422721e+05,340910.510734,401361.589117,63244.277797,679027.822053,7.265596e+05,55781.689619,670777.929502
35277,2021,63,33,5,14,6.890740e+05,409881.670752,279192.379055,105797.415959,583276.633848,6.756922e+05,99409.360517,576282.828612
35278,2021,63,34,5,15,5.219002e+05,314235.052171,207665.098239,242656.725838,279243.424572,5.025646e+05,231429.087715,271135.495389
