# WWYZ production lengths

## Setup

In [1]:
import numpy as np
import pandas as pd
import duckdb
from functions import zeroout, get_fatdiag

### Select MRIO version

In [2]:
# input, output = 'adb-mrio.parquet', 'lengths.parquet'
input, output = 'adb-mrio62.parquet', 'lengths62.parquet'
# input, output = 'adb-mrio62-const.parquet', 'lengths62-const.parquet'

### Parameters

In [3]:
sectors = pd.read_excel('../data/raw/sectors.xlsx').drop_duplicates(subset='ind', ignore_index=True)
years = duckdb.sql(f"SELECT DISTINCT t FROM read_parquet('../data/mrio/{input}') ORDER BY t").df()['t']
rows = duckdb.sql(f"SELECT COUNT(*) FROM read_parquet('../data/mrio/{input}')").df()

N = 35                                              # Number of sectors
G = int((rows.iloc[0, 0] / len(years) - 7) / N)     # Number of countries + 1
f = 5                                               # Number of final demand components

np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

## Decompositions

In [4]:
DF = pd.DataFrame()

for year in years:
    
    mrio = duckdb.sql(f"SELECT * EXCLUDE(t, si) FROM read_parquet('../data/mrio/{input}') WHERE t={year}").df()
    mrio = mrio.values

    x = mrio[-1][:(G*N)]
    Z = mrio[:(G*N)][:, :(G*N)]
    va = np.sum(mrio[-7:-1][:, :(G*N)], axis=0)
    Y_big = mrio[:(G*N)][:, (G*N):-1]
    Y = Y_big @ np.kron(np.eye(G), np.ones((f, 1)))
    y = np.sum(Y, axis=1)
    yd = get_fatdiag(Y)
    yf = y - yd
    v = np.where(x != 0, va / x, 0)
    Dx = np.diag(np.where(x != 0, 1 / x, 0))
    A = Z @ Dx
    Ad, Af = zeroout(A, inverse=True), zeroout(A)
    B = np.linalg.inv(np.eye(G*N) - A)
    Bd = np.linalg.inv(np.eye(G*N) - Ad)

    X = np.diag(v) @ B @ B @ np.diag(y)
    X_D = np.diag(v) @ Bd @ Bd @ np.diag(yd)
    X_RT = np.diag(v) @ Bd @ Bd @ np.diag(yf)
    Xd_GVC = np.diag(v) @ Bd @ Bd @ Af @ B @ np.diag(y)
    E_GVC = np.diag(v) @ B @ Af @ B @ np.diag(y)
    Xf_GVC = np.diag(v) @ Bd @ Af @ B @ Ad @ B @ np.diag(y)
    VY_D = np.diag(v) @ Bd @ np.diag(yd)
    VY_RT = np.diag(v) @ Bd @ np.diag(yf)
    VY_GVC = np.diag(v) @ Bd @ Af @ B @ np.diag(y)

    DFt = pd.DataFrame({
        't': int(year), 's': np.arange(1, G+1).repeat(N),
        'i': np.tile(sectors['ind'], G), 
        'i5': np.tile(sectors['ind5'], G), 
        'i15': np.tile(sectors['ind15'], G),
        'va': va, 'y': y,
        'Xv': np.sum(X, axis=1),
        'Xv_D': np.sum(X_D, axis=1),
        'Xv_RT': np.sum(X_RT, axis=1),
        'Xvd_GVC': np.sum(Xd_GVC, axis=1),
        'Ev_GVC': np.sum(E_GVC, axis=1),
        'Xvf_GVC': np.sum(Xf_GVC, axis=1),
        'V_D': np.sum(VY_D, axis=1),
        'V_RT': np.sum(VY_RT, axis=1),
        'V_GVC': np.sum(VY_GVC, axis=1),
        'Xy': np.sum(X, axis=0),
        'Xy_D': np.sum(X_D, axis=0),
        'Xy_RT': np.sum(X_RT, axis=0),
        'Xyd_GVC': np.sum(Xd_GVC, axis=0),
        'Ey_GVC': np.sum(E_GVC, axis=0),
        'Xyf_GVC': np.sum(Xf_GVC, axis=0),
        'Y_D': np.sum(VY_D, axis=0),
        'Y_RT': np.sum(VY_RT, axis=0),
        'Y_GVC': np.sum(VY_GVC, axis=0)
    })

    DFagg = DFt.drop(['i', 'i5', 'i15'], axis=1)
    DFagg.insert(2, 'agg', 0)
    DFagg.insert(3, 'i', 0)
    DFagg = DFagg.groupby(['t', 's', 'agg', 'i']).sum().reset_index()

    DF5 = DFt.drop(['i', 'i15'], axis=1)
    DF5.insert(2, 'agg', 5)
    DF5 = DF5.groupby(['t', 's', 'agg', 'i5']).sum().reset_index()
    DF5 = DF5.rename(columns={'i5': 'i'})

    DF15 = DFt.drop(['i', 'i5'], axis=1)
    DF15.insert(2, 'agg', 15)
    DF15 = DF15.groupby(['t', 's', 'agg', 'i15']).sum().reset_index()
    DF15 = DF15.rename(columns={'i15': 'i'})

    DF35 = DFt.drop(['i5', 'i15'], axis=1)
    DF35.insert(2, 'agg', 35)

    DF = pd.concat([DF, DFagg, DF5, DF15, DF35], ignore_index=True)
    
    print(f'{year} done')

2000 done
2007 done
2008 done
2009 done
2010 done
2011 done
2012 done
2013 done
2014 done
2015 done
2016 done
2017 done
2018 done
2019 done
2020 done
2021 done
2022 done


In [5]:
APL = pd.DataFrame({
    't': DF['t'], 's': DF['s'], 'agg': DF['agg'], 'i': DF['i'],
    'PLv': DF['Xv'] / DF['va'],
    'PLv_D': DF['Xv_D'] / DF['V_D'],
    'PLv_RT': DF['Xv_RT'] / DF['V_RT'],
    'PLvd_GVC': DF['Xvd_GVC'] / DF['V_GVC'],
    'CBv_GVC': DF['Ev_GVC'] / DF['V_GVC'],
    'PLvf_GVC': DF['Xvf_GVC'] / DF['V_GVC'],
    'PLy': DF['Xy'] / DF['y'],
    'PLy_D': DF['Xy_D'] / DF['Y_D'],
    'PLy_RT': DF['Xy_RT'] / DF['Y_RT'],
    'PLyd_GVC': DF['Xyd_GVC'] / DF['Y_GVC'],
    'CBy_GVC': DF['Ey_GVC'] / DF['Y_GVC'],
    'PLyf_GVC': DF['Xyf_GVC'] / DF['Y_GVC']
})

APL['PLv_GVC'] = APL['PLvd_GVC'] + APL['CBv_GVC'] + APL['PLvf_GVC']
APL['PLy_GVC'] = APL['PLyd_GVC'] + APL['CBy_GVC'] + APL['PLyf_GVC']
APL['GVC_POS'] = APL['PLv_GVC'] / APL['PLy_GVC']

APL.to_parquet(f'../data/{output}', index=False)

### View results

In [6]:
duckdb.sql(f"SELECT * FROM read_parquet('../data/{output}')").df()

Unnamed: 0,t,s,agg,i,PLv,PLv_D,PLv_RT,PLvd_GVC,CBv_GVC,PLvf_GVC,PLy,PLy_D,PLy_RT,PLyd_GVC,CBy_GVC,PLyf_GVC,PLv_GVC,PLy_GVC,GVC_POS
0,2000,1,0,0,2.185302,1.789062,2.065579,1.901837,1.299816,1.060320,2.059911,1.789062,2.065579,1.818362,1.299824,1.160252,4.261972,4.278437,0.996152
1,2000,2,0,0,1.896282,1.475281,1.590280,1.609824,1.290855,0.830214,1.851203,1.475281,1.590280,1.780288,1.304399,0.689627,3.730894,3.774314,0.988496
2,2000,3,0,0,2.114788,1.496717,1.741962,1.665996,1.326253,0.848430,2.060697,1.496717,1.741962,1.779077,1.292125,0.704879,3.840678,3.776081,1.017107
3,2000,4,0,0,1.842858,1.685525,1.871966,1.861704,1.325301,0.877695,2.150396,1.685525,1.871966,1.715011,1.266384,0.928858,4.064700,3.910253,1.039498
4,2000,5,0,0,1.808543,1.656748,1.992624,1.919679,1.246337,0.868261,1.834858,1.656748,1.992624,1.769559,1.288589,0.984621,4.034277,4.042768,0.997900
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59971,2022,63,35,31,1.280789,1.166572,2.120194,2.441689,1.322993,1.086392,2.270799,1.893669,1.893669,1.846901,1.344290,1.532105,4.851074,4.723296,1.027053
59972,2022,63,35,32,1.187886,1.113726,1.517855,2.346552,1.274259,1.016927,2.224730,1.876453,1.876453,1.831841,1.317312,1.656646,4.637738,4.805799,0.965029
59973,2022,63,35,33,1.330771,1.224342,1.935540,2.606647,1.234263,0.841027,2.653038,2.205873,2.205873,1.857713,1.333418,1.460655,4.681937,4.651787,1.006481
59974,2022,63,35,34,2.321055,1.886300,2.678924,3.140249,1.319705,1.124313,2.661841,2.180653,2.180653,1.937300,1.313691,1.289227,5.584267,4.540218,1.229956
