# Value added flows

This notebook computes the value added originating from one country or country-sector that is absorbed by another country or country-sector. These flows are obtained from the VBY matrix.

Before running this notebook, select the MRIO version to load from `data/mrio/` as well as the corresponding output filename to be saved in `data/`.

## Set up

In [1]:
import numpy as np
import pandas as pd
import duckdb
from functions import asvector

### Select MRIO version

In [2]:
input, output = 'adb-mrio.parquet', 'flows.parquet'
# input, output = 'adb-mrio62.parquet', 'flows62.parquet'
# input, output = 'adb-mrio62-const.parquet', 'flows62-const.parquet'

### Parameters

In [3]:
sectors = pd.read_excel('../dicts/sectors.xlsx').drop_duplicates(subset='ind', ignore_index=True)
years = duckdb.sql(f"SELECT DISTINCT t FROM read_parquet('../data/mrio/{input}') ORDER BY t").df()['t']
rows = duckdb.sql(f"SELECT COUNT(*) FROM read_parquet('../data/mrio/{input}')").df()

N = 35                                              # Number of sectors
G = int((rows.iloc[0, 0] / len(years) - 7) / N)     # Number of countries + 1
f = 5                                               # Number of final demand components

np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

## Compute flows

In [4]:
flows = pd.DataFrame()

for year in years:
    
    mrio = duckdb.sql(f"SELECT * EXCLUDE(t, si) FROM read_parquet('../data/mrio/{input}') WHERE t={year}").df()
    mrio = mrio.values

    x = mrio[-1][:(G*N)]
    Z = mrio[:(G*N)][:, :(G*N)]
    va = np.sum(mrio[-7:-1][:, :(G*N)], axis=0)
    v = np.where(x != 0, va/x, 0)
    A = Z @ np.diag(np.where(x != 0, 1/x, 0))
    B = np.linalg.inv(np.eye(G*N) - A)
    Y_big = mrio[:(G*N)][:, (G*N):-1]
    Y = Y_big @ np.kron(np.eye(G), np.ones((f, 1)))
    VBY = np.diag(v) @ B @ Y

    flows_t = pd.DataFrame({
        't': int(year),
        's': np.tile(np.arange(1, G+1).repeat(N), G),
        'r': np.arange(1, G+1).repeat(G*N),
        'i': np.tile(sectors['ind'], G*G),
        'i5': np.tile(sectors['ind5'], G*G),
        'i15': np.tile(sectors['ind15'], G*G),
        'flows': asvector(VBY)
    })
    flows = pd.concat([flows, flows_t], ignore_index=True)

    print(f'{year} done')

flows.sort_values(['t', 's', 'r', 'i'], inplace=True)
flows.to_parquet(f'../data/{output}', index=False)

2017 done
2018 done
2019 done
2020 done
2021 done
2022 done


### View results

In [5]:
duckdb.sql(f"SELECT * FROM read_parquet('../data/{output}')").df()

Unnamed: 0,t,s,r,i,i5,i15,flows
0,2017,1,1,1,1,1,24580.928164
1,2017,1,1,2,1,2,17033.117412
2,2017,1,1,3,2,3,14724.957952
3,2017,1,1,4,2,3,1059.935367
4,2017,1,1,5,2,3,142.991659
...,...,...,...,...,...,...,...
1119085,2022,73,73,31,5,13,336582.229471
1119086,2022,73,73,32,5,14,245745.094921
1119087,2022,73,73,33,5,14,154924.793812
1119088,2022,73,73,34,5,15,129306.912080
