# WWYZ production lengths

In [40]:
import numpy as np
import pandas as pd
import os
import re
import duckdb
from functions import zeroout, get_fatdiag

In [41]:
# inputfolder = 'ADB-MRIO'
# outputfilename = 'lengths'
version = None

inputfolder = 'ADB-MRIO62'
outputfilename = 'lengths62'

filelist = [file for file in os.listdir(f'../data/interim/{inputfolder}') if not file.startswith('.')]
filelist.sort()

## Setup

In [42]:
sectors = pd.read_excel('../data/interim/sectors.xlsx')
sectors = sectors.drop_duplicates(subset='ind', ignore_index=True)

G = 73      # Number of countries + ROW
N = 35      # Number of sectors
f = 5       # Number of final demand components

np.seterr(divide='ignore', invalid='ignore')

{'divide': 'ignore', 'over': 'warn', 'under': 'ignore', 'invalid': 'ignore'}

## Decompositions

Note that the country index `s` used henceforth corresponds to the MRIO country indices, which start at 1 and not 0.

In [12]:
year = re.search('[0-9]{4}', filelist[0]).group()

mrio = duckdb.sql(
    f"""
    SELECT * EXCLUDE(C0)
    FROM read_parquet('../data/interim/{inputfolder}/{filelist[0]}')
    """
).df()
mrio = mrio.values

x = mrio[-1][:(G*N)]
Z = mrio[:(G*N)][:, :(G*N)]
va = np.sum(mrio[-7:-1][:, :(G*N)], axis=0)
Y_big = mrio[:(G*N)][:, (G*N):-1]
Y = Y_big @ np.kron(np.eye(G), np.ones((f, 1)))
y = np.sum(Y, axis=1)
yd = get_fatdiag(Y)
yf = y - yd
v = np.where(x != 0, va / x, 0)
Dx = np.diag(np.where(x != 0, 1 / x, 0))
A = Z @ Dx
Ad = zeroout(A, inverse=True)
Af = zeroout(A)
B = np.linalg.inv(np.eye(G*N) - A)
Bd = np.linalg.inv(np.eye(G*N) - Ad)

In [17]:
X = np.diag(v) @ B @ B @ np.diag(y)
X_D = np.diag(v) @ Bd @ Bd @ np.diag(yd)
X_RT = np.diag(v) @ Bd @ Bd @ np.diag(yf)
Xd_GVC = np.diag(v) @ Bd @ Bd @ Af @ B @ np.diag(y)
E_GVC = np.diag(v) @ B @ Af @ B @ np.diag(y)
Xf_GVC = np.diag(v) @ Bd @ Af @ B @ Ad @ B @ np.diag(y)

VY_D = np.diag(v) @ Bd @ np.diag(yd)
VY_RT = np.diag(v) @ Bd @ np.diag(yf)
VY_GVC = np.diag(v) @ Bd @ Af @ B @ np.diag(y)

In [23]:
np.tile(sectors['ind'], G-1).shape

(2520,)

In [24]:
DF = pd.DataFrame({
    't': year,
    's': np.arange(1, G+1).repeat(N),
    'i': np.tile(sectors['ind'], G),
    'i5': np.tile(sectors['ind5'], G),
    'i15': np.tile(sectors['ind15'], G),
    'va': va,
    'y': y,
    'Xv': np.sum(X, axis=1),
    'Xv_D': np.sum(X_D, axis=1),
    'Xv_RT': np.sum(X_RT, axis=1),
    'Xvd_GVC': np.sum(Xd_GVC, axis=1),
    'Ev_GVC': np.sum(E_GVC, axis=1),
    'Xvf_GVC': np.sum(Xf_GVC, axis=1),
    'V_D': np.sum(VY_D, axis=1),
    'V_RT': np.sum(VY_RT, axis=1),
    'V_GVC': np.sum(VY_GVC, axis=1),
    'Xy': np.sum(X, axis=0),
    'Xy_D': np.sum(X_D, axis=0),
    'Xy_RT': np.sum(X_RT, axis=0),
    'Xyd_GVC': np.sum(Xd_GVC, axis=0),
    'Ey_GVC': np.sum(E_GVC, axis=0),
    'Xyf_GVC': np.sum(Xf_GVC, axis=0),
    'Y_D': np.sum(VY_D, axis=0),
    'Y_RT': np.sum(VY_RT, axis=0),
    'Y_GVC': np.sum(VY_GVC, axis=0)
})

In [25]:
DF

Unnamed: 0,t,s,i,i5,i15,va,y,Xv,Xv_D,Xv_RT,...,V_GVC,Xy,Xy_D,Xy_RT,Xyd_GVC,Ey_GVC,Xyf_GVC,Y_D,Y_RT,Y_GVC
0,2017,1,1,1,1,37360.383812,21310.149591,92642.856993,51491.278372,9029.213612,...,8279.356390,45739.015435,30085.539300,4540.418656,4789.267776,3562.291419,2761.498283,16350.230575,2467.527379,2492.391637
1,2017,1,2,1,2,116560.893616,9089.354564,495987.966145,42282.680828,2095.572946,...,99560.403134,18827.299134,12089.537730,1455.936414,2375.860164,1759.972188,1145.992638,7011.325670,844.370114,1233.658779
2,2017,1,3,2,3,21485.062150,42321.704527,36837.960461,22743.705526,5285.151416,...,2323.761003,107251.921995,58165.095685,23824.910015,10451.577567,7444.568019,7365.770709,26209.565383,10735.657346,5376.481797
3,2017,1,4,2,3,1883.615778,1539.501858,4219.162470,1897.481273,282.637314,...,577.889069,3196.562375,1472.140700,602.770699,541.900395,399.947245,179.803337,891.157907,364.886233,283.457717
4,2017,1,5,2,3,306.144939,375.111466,673.126043,214.310384,45.678176,...,124.029392,956.664394,419.089545,150.569192,195.775310,127.199601,64.030747,207.292366,74.475358,93.343741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2550,2017,73,31,5,13,292360.900966,461487.047908,332017.742599,286959.649396,6164.052621,...,10709.798339,884810.855029,437906.058607,6957.630842,212682.251224,159468.344474,67796.569883,331908.869478,5273.504080,124304.674350
2551,2017,73,32,5,14,218056.135019,345008.360999,244673.884510,215870.402196,5410.192225,...,6753.095467,658987.280347,347499.287897,7140.473635,144990.166860,105940.439089,53416.912866,254650.561814,5232.602443,85125.196743
2552,2017,73,33,5,14,122586.394124,244490.608268,154297.784074,124887.389966,4927.655532,...,7291.080717,579189.803804,276459.447463,8060.403651,131224.385346,95708.048473,67737.518871,163875.699108,4777.931431,75836.977729
2553,2017,73,34,5,15,145953.349879,183415.282880,292425.379775,164642.012115,16670.439307,...,25005.676899,417734.766621,193670.431736,16628.391345,93960.746760,67372.003351,46103.193429,120408.636395,10338.191067,52668.455417


In [39]:
DFagg = DF.drop(['i', 'i5', 'i15'], axis=1)
DFagg.insert(2, 'agg', 0)
DFagg.insert(3, 'i', 0)
DFagg = DFagg.groupby(['t', 's', 'agg', 'i']).sum().reset_index()

DF5 = DF.drop(['i', 'i15'], axis=1)
DF5.insert(2, 'agg', 5)
DF5 = DF5.groupby(['t', 's', 'agg', 'i5']).sum().reset_index()
DF5 = DF5.rename(columns={'i5': 'i'})

DF15 = DF.drop(['i', 'i5'], axis=1)
DF15.insert(2, 'agg', 15)
DF15 = DF15.groupby(['t', 's', 'agg', 'i15']).sum().reset_index()
DF15 = DF15.rename(columns={'i15': 'i'})

DF35 = DF.drop(['i5', 'i15'], axis=1)
DF35.insert(2, 'agg', 35)

DFagg = pd.concat([DFagg, DF5, DF15, DF35])
DFagg

Unnamed: 0,t,s,agg,i,va,y,Xv,Xv_D,Xv_RT,Xvd_GVC,...,V_GVC,Xy,Xy_D,Xy_RT,Xyd_GVC,Ey_GVC,Xyf_GVC,Y_D,Y_RT,Y_GVC
0,2017,1,0,0,1.333367e+06,1.238970e+06,3.054873e+06,1.888145e+06,72806.500005,399976.047072,...,226376.419796,2.557799e+06,1.888145e+06,72806.500005,258608.628595,187423.908168,150815.034430,1.068926e+06,38064.715576,131978.784737
1,2017,2,0,0,3.811415e+05,3.882512e+05,7.522859e+05,3.503484e+05,75162.516138,133241.443453,...,83955.732939,7.843621e+05,3.503484e+05,75162.516138,159620.343151,132131.019368,67099.846938,2.500748e+05,47110.961255,91065.467523
2,2017,3,0,0,4.550668e+05,4.838257e+05,9.766157e+05,3.546711e+05,106455.959456,201103.826212,...,132095.410861,1.082028e+06,3.546711e+05,106455.959456,285228.123785,229515.128405,106157.597336,2.547989e+05,68172.496630,160854.288671
3,2017,4,0,0,5.326033e+04,5.064278e+04,1.199754e+05,4.376497e+04,12049.117403,25626.863203,...,16386.390843,1.108861e+05,4.376497e+04,12049.117403,25039.158166,19546.859607,10486.019371,2.923669e+04,7637.244193,13768.849493
4,2017,5,0,0,1.922583e+06,1.921689e+06,3.546528e+06,2.742653e+06,142308.782427,291692.883634,...,155391.127964,3.561090e+06,2.742653e+06,142308.782427,296125.800042,214208.114672,165794.282909,1.699572e+06,67620.138171,154496.544867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2550,2017,73,35,31,2.923609e+05,4.614870e+05,3.320177e+05,2.869596e+05,6164.052621,14778.730158,...,10709.798339,8.848109e+05,4.379061e+05,6957.630842,212682.251224,159468.344474,67796.569883,3.319089e+05,5273.504080,124304.674350
2551,2017,73,35,32,2.180561e+05,3.450084e+05,2.446739e+05,2.158704e+05,5410.192225,9723.629657,...,6753.095467,6.589873e+05,3.474993e+05,7140.473635,144990.166860,105940.439089,53416.912866,2.546506e+05,5232.602443,85125.196743
2552,2017,73,35,33,1.225864e+05,2.444906e+05,1.542978e+05,1.248874e+05,4927.655532,11320.886067,...,7291.080717,5.791898e+05,2.764594e+05,8060.403651,131224.385346,95708.048473,67737.518871,1.638757e+05,4777.931431,75836.977729
2553,2017,73,35,34,1.459533e+05,1.834153e+05,2.924254e+05,1.646420e+05,16670.439307,51248.588625,...,25005.676899,4.177348e+05,1.936704e+05,16628.391345,93960.746760,67372.003351,46103.193429,1.204086e+05,10338.191067,52668.455417


In [44]:
DF = pd.DataFrame()

for file in filelist:
    
    year = re.search('[0-9]{4}', file).group()

    mrio = duckdb.sql(
        f"""
        SELECT * EXCLUDE(C0)
        FROM read_parquet('../data/interim/{inputfolder}/{file}')
        """
    ).df()
    mrio = mrio.values

    x = mrio[-1][:(G*N)]
    Z = mrio[:(G*N)][:, :(G*N)]
    va = np.sum(mrio[-7:-1][:, :(G*N)], axis=0)
    Y_big = mrio[:(G*N)][:, (G*N):-1]
    Y = Y_big @ np.kron(np.eye(G), np.ones((f, 1)))
    y = np.sum(Y, axis=1)
    yd = get_fatdiag(Y)
    yf = y - yd
    v = np.where(x != 0, va / x, 0)
    Dx = np.diag(np.where(x != 0, 1 / x, 0))
    A = Z @ Dx
    Ad = zeroout(A, inverse=True)
    Af = zeroout(A)
    B = np.linalg.inv(np.eye(G*N) - A)
    Bd = np.linalg.inv(np.eye(G*N) - Ad)

    X = np.diag(v) @ B @ B @ np.diag(y)
    X_D = np.diag(v) @ Bd @ Bd @ np.diag(yd)
    X_RT = np.diag(v) @ Bd @ Bd @ np.diag(yf)
    Xd_GVC = np.diag(v) @ Bd @ Bd @ Af @ B @ np.diag(y)
    E_GVC = np.diag(v) @ B @ Af @ B @ np.diag(y)
    Xf_GVC = np.diag(v) @ Bd @ Af @ B @ Ad @ B @ np.diag(y)
    VY_D = np.diag(v) @ Bd @ np.diag(yd)
    VY_RT = np.diag(v) @ Bd @ np.diag(yf)
    VY_GVC = np.diag(v) @ Bd @ Af @ B @ np.diag(y)

    DFt = pd.DataFrame({
        't': year,
        's': np.arange(1, G+1).repeat(N),
        'i': np.tile(sectors['ind'], G),
        'i5': np.tile(sectors['ind5'], G),
        'i15': np.tile(sectors['ind15'], G),
        'va': va,
        'y': y,
        'Xv': np.sum(X, axis=1),
        'Xv_D': np.sum(X_D, axis=1),
        'Xv_RT': np.sum(X_RT, axis=1),
        'Xvd_GVC': np.sum(Xd_GVC, axis=1),
        'Ev_GVC': np.sum(E_GVC, axis=1),
        'Xvf_GVC': np.sum(Xf_GVC, axis=1),
        'V_D': np.sum(VY_D, axis=1),
        'V_RT': np.sum(VY_RT, axis=1),
        'V_GVC': np.sum(VY_GVC, axis=1),
        'Xy': np.sum(X, axis=0),
        'Xy_D': np.sum(X_D, axis=0),
        'Xy_RT': np.sum(X_RT, axis=0),
        'Xyd_GVC': np.sum(Xd_GVC, axis=0),
        'Ey_GVC': np.sum(E_GVC, axis=0),
        'Xyf_GVC': np.sum(Xf_GVC, axis=0),
        'Y_D': np.sum(VY_D, axis=0),
        'Y_RT': np.sum(VY_RT, axis=0),
        'Y_GVC': np.sum(VY_GVC, axis=0)
    })

    DFagg = DFt.drop(['i', 'i5', 'i15'], axis=1)
    DFagg.insert(2, 'agg', 0)
    DFagg.insert(3, 'i', 0)
    DFagg = DFagg.groupby(['t', 's', 'agg', 'i']).sum().reset_index()

    DF5 = DFt.drop(['i', 'i15'], axis=1)
    DF5.insert(2, 'agg', 5)
    DF5 = DF5.groupby(['t', 's', 'agg', 'i5']).sum().reset_index()
    DF5 = DF5.rename(columns={'i5': 'i'})

    DF15 = DFt.drop(['i', 'i5'], axis=1)
    DF15.insert(2, 'agg', 15)
    DF15 = DF15.groupby(['t', 's', 'agg', 'i15']).sum().reset_index()
    DF15 = DF15.rename(columns={'i15': 'i'})

    DF35 = DFt.drop(['i5', 'i15'], axis=1)
    DF35.insert(2, 'agg', 35)

    DF = pd.concat([DF, DFagg, DF5, DF15, DF35], ignore_index=True)
    
    print(f'{year} done')

2017 done
2018 done
2019 done
2020 done
2021 done


In [50]:
APL = pd.DataFrame({
    't': DF['t'],
    's': DF['s'],
    'agg': DF['agg'],
    'i': DF['i'],
    'PLv': DF['Xv'] / DF['va'],
    'PLv_D': DF['Xv_D'] / DF['V_D'],
    'PLv_RT': DF['Xv_RT'] / DF['V_RT'],
    'PLvd_GVC': DF['Xvd_GVC'] / DF['V_GVC'],
    'CBv_GVC': DF['Ev_GVC'] / DF['V_GVC'],
    'PLvf_GVC': DF['Xvf_GVC'] / DF['V_GVC'],
    'PLy': DF['Xy'] / DF['y'],
    'PLy_D': DF['Xy_D'] / DF['Y_D'],
    'PLy_RT': DF['Xy_RT'] / DF['Y_RT'],
    'PLyd_GVC': DF['Xyd_GVC'] / DF['Y_GVC'],
    'CBy_GVC': DF['Ey_GVC'] / DF['Y_GVC'],
    'PLyf_GVC': DF['Xyf_GVC'] / DF['Y_GVC']
})

APL['PLv_GVC'] = APL['PLvd_GVC'] + APL['CBv_GVC'] + APL['PLvf_GVC']
APL['PLy_GVC'] = APL['PLyd_GVC'] + APL['CBy_GVC'] + APL['PLyf_GVC']
APL['GVC_POS'] = APL['PLv_GVC'] / APL['PLy_GVC']

if version is None:
    outputfilename = f'{outputfile}.csv'
else:
    outputfilename = f'{outputfilename}_{version}.csv'

APL.to_csv(f'../data/final/{outputfilename}.csv', index=False)

In [51]:
APL

Unnamed: 0,t,s,agg,i,PLv,PLv_D,PLv_RT,PLvd_GVC,CBv_GVC,PLvf_GVC,PLy,PLy_D,PLy_RT,PLyd_GVC,CBy_GVC,PLyf_GVC,PLv_GVC,PLy_GVC,GVC_POS
0,2017,1,0,0,2.291097,1.766394,1.912703,1.766863,1.329694,1.735759,2.064456,1.766394,1.912703,1.959471,1.420106,1.142722,4.832315,4.522299,1.068553
1,2017,2,0,0,1.973771,1.400974,1.595436,1.587044,1.428414,0.876772,2.020244,1.400974,1.595436,1.752809,1.450945,0.736831,3.892230,3.940585,0.987729
2,2017,3,0,0,2.146093,1.391965,1.561568,1.522413,1.455991,0.923992,2.236400,1.391965,1.561568,1.773208,1.426851,0.659961,3.902396,3.860020,1.010978
3,2017,4,0,0,2.252622,1.496919,1.577679,1.563911,1.396315,0.955298,2.189574,1.496919,1.577679,1.818537,1.419644,0.761576,3.915524,3.999756,0.978941
4,2017,5,0,0,1.844668,1.613731,2.104533,1.877153,1.262197,1.118077,1.853104,1.613731,2.104533,1.916715,1.386491,1.073126,4.257427,4.376332,0.972830
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20435,2021,73,35,31,1.382581,1.220901,2.168863,2.641193,1.336201,1.114430,2.586496,2.058027,2.058027,1.775386,1.284028,1.658177,5.091823,4.717590,1.079327
20436,2021,73,35,32,1.215246,1.126935,1.501255,2.356163,1.281070,0.992992,2.611999,2.118644,2.118644,1.758542,1.258896,2.004315,4.630225,5.021753,0.922034
20437,2021,73,35,33,1.379254,1.248587,1.882396,2.925765,1.290489,0.933647,3.070384,2.468910,2.468910,1.784668,1.288259,1.646783,5.149901,4.719709,1.091148
20438,2021,73,35,34,2.515351,1.986580,2.698076,3.481650,1.328324,1.207610,2.918492,2.342176,2.342176,1.791091,1.272696,1.667040,6.017584,4.730827,1.271994
