# Produce IO Tables from OECD data

This notebook generalises the code from the notebook "Produce OECD IO tables - one country only" to multiple countries in one year. Its aim is to produce Input-Output (IO) tables for a number of countries using the OECD data published [here](https://www.oecd.org/sti/ind/inter-country-input-output-tables.htm). 

## 1. Wide format

In [5]:
# Imports and path
!pip install pymrio
import pymrio
import pandas as pd
from pathlib import Path
oecd_storage = Path('/project_data/data_asset/')



In [6]:
# Select the year to parse - 2015 in this case
oecd_path_year = pymrio.parse_oecd(path=oecd_storage, year=2015)

In [7]:
# Show countries
print('There are', len(oecd_path_year.get_regions()), 'countries in the dataset:')
print(set(oecd_path_year.get_regions()))

There are 65 countries in the dataset:
{'SGP', 'TUN', 'IDN', 'HKG', 'MLT', 'TUR', 'NOR', 'GRC', 'BGR', 'CHN', 'KHM', 'SVN', 'MYS', 'BEL', 'CZE', 'CHL', 'EST', 'KOR', 'NLD', 'DNK', 'LUX', 'KAZ', 'HRV', 'POL', 'PRT', 'BRN', 'PHL', 'THA', 'VNM', 'CRI', 'ARG', 'MEX', 'SWE', 'ROW', 'ZAF', 'SAU', 'HUN', 'NZL', 'FIN', 'IRL', 'DEU', 'CHE', 'ISL', 'MAR', 'GBR', 'CAN', 'FRA', 'ROU', 'BRA', 'COL', 'ISR', 'AUS', 'ESP', 'ITA', 'PER', 'RUS', 'USA', 'CYP', 'LVA', 'AUT', 'JPN', 'TWN', 'SVK', 'LTU', 'IND'}


In [8]:
countries = oecd_path_year.get_regions()
non_essential_sectors = ['07T08','24','25','29','30','68']

In [9]:
# Constrcut Input-Output tables for selected countries    
IO_EXT = oecd_path_year.Z.copy()
        
# TAXES AND VALUE ADDED
TAXSUB = oecd_path_year.factor_inputs.F.iloc[oecd_path_year.factor_inputs.F.index.get_level_values('inputtype') != 'VALU']
VA = oecd_path_year.factor_inputs.F.iloc[oecd_path_year.factor_inputs.F.index.get_level_values('inputtype') == 'VALU']
VA_TAXES = VA.append(TAXSUB * (-1))

IO_EXT.loc[('ALL', 'VALUE ADDED'),:] = pd.Series(VA_TAXES.sum()).values


In [10]:
# Collapse non-essential columns
country_NE = pd.DataFrame()

for country in countries:
    
    for sector in non_essential_sectors:
        
        column = IO_EXT[(country, sector)]
        country_NE = pd.concat([country_NE, column], axis = 1)
        IO_EXT.drop(columns = (country, sector), inplace = True)
    
    country_NE = country_NE.sum(axis = 1)
    
    IO_EXT[(country, 'All non-essential')] = country_NE
    
    country_NE = pd.DataFrame()

In [11]:
# Collapse non-essential rows
country_NE = pd.DataFrame()

for country in countries:
    
    for sector in non_essential_sectors:
        
        row = IO_EXT.loc[(country, sector),:]
        country_NE = pd.concat([country_NE, row], axis = 1)
        IO_EXT.drop(index = (country, sector), inplace = True)
    
    country_NE = country_NE.sum(axis = 1)
    
    IO_EXT.loc[(country, 'All non-essential'),:] = country_NE
    
    country_NE = pd.DataFrame()

In [12]:
# TOTAL PRODUCTION
IO_EXT.loc[('ALL', 'TOTAL PRODUCTION'),:] = pd.Series(IO_EXT.sum()).values   

In [13]:
IO_EXT

Unnamed: 0_level_0,region,ARG,ARG,ARG,ARG,ARG,ARG,ARG,ARG,ARG,ARG,...,SVK,SVN,SWE,THA,TUN,TUR,TWN,USA,VNM,ZAF
Unnamed: 0_level_1,sector,01T03,05T06,09,10T12,13T15,16,17T18,19,20T21,22,...,All non-essential,All non-essential,All non-essential,All non-essential,All non-essential,All non-essential,All non-essential,All non-essential,All non-essential,All non-essential
region,sector,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
ARG,01T03,13014.595854,8.049052,4.143001,34292.771836,964.672923,545.773454,240.085264,16.713493,324.245670,215.505180,...,0.001767,0.001402,0.007053,0.025203,0.003495,0.014174,0.003004,1.406894e-01,0.054142,0.286578
ARG,05T06,199.321049,2276.678829,275.113858,254.403159,34.275764,7.246904,81.646943,16381.766599,757.582323,101.862709,...,0.000014,0.000168,0.001535,0.000625,0.000015,0.002654,0.000787,2.057142e+00,0.000189,0.000889
ARG,09,313.959066,1894.970278,246.435218,39.008104,0.000000,2.405210,12.716208,14.148393,6.509650,22.030458,...,0.000000,0.000068,0.008444,0.003079,0.000049,0.000747,0.004724,1.541062e+00,0.000314,0.020269
ARG,10T12,1960.104890,31.815860,5.132780,4438.490860,243.163011,22.569292,88.041559,21.605191,173.685120,50.899412,...,0.004491,0.006811,0.059601,0.184813,0.020985,0.084819,0.028495,1.536981e+00,0.237270,0.762123
ARG,13T15,257.570464,39.774788,6.260411,435.065366,6610.432902,26.232974,76.357137,13.831488,136.254071,166.691463,...,0.007941,2.121081,0.046811,0.910942,0.000358,0.127955,2.083819,1.762954e+00,0.027347,0.402780
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TWN,All non-essential,0.687945,1.416402,0.288667,1.680883,0.348774,0.223092,0.182624,0.147612,0.304447,0.683925,...,20.721855,6.550438,54.306799,456.011589,2.052466,88.611618,27964.922129,2.549758e+03,364.295553,49.705607
USA,All non-essential,8.382023,8.336595,1.382335,13.529736,4.146243,1.715818,2.447097,1.464879,7.432188,5.586056,...,92.617364,14.391025,228.008866,746.747414,5.831914,449.390503,840.346450,6.652419e+05,241.909068,212.045494
VNM,All non-essential,0.131674,0.124189,0.040799,0.130207,0.032578,0.015670,0.042467,0.020378,0.302031,0.054106,...,2.665338,2.312445,7.942385,322.065092,0.281732,15.000640,122.673513,6.109197e+02,9514.321979,4.406049
ZAF,All non-essential,0.614518,0.725694,0.140749,0.981533,0.247062,0.139073,0.224580,1.151624,0.459515,0.440182,...,5.229450,5.924965,25.716588,70.909994,0.848013,84.670033,142.160580,7.719053e+02,3.255237,20863.262335


In [14]:
# EXPORT TO CSV
#IO_EXT.to_csv('/project_data/data_asset/IO_WORLD.csv')

## 2. Long format

In [15]:
# Copy dataframe and prepare it for melting
IO_MELT = IO_EXT.copy()
IO_MELT.reset_index(inplace = True)
IO_MELT.rename(columns = {'region': 'input_country', 'sector': 'input_sector'}, inplace = True)

In [16]:
# Melt dataframe - From wide to long format
IO_MELT = pd.melt(IO_MELT, id_vars = ['input_country', 'input_sector'], var_name = ['output_country', 'output_sector'])
IO_MELT

Unnamed: 0,input_country,input_sector,output_country,output_sector,value
0,ARG,01T03,ARG,01T03,13014.595854
1,ARG,05T06,ARG,01T03,199.321049
2,ARG,09,ARG,01T03,313.959066
3,ARG,10T12,ARG,01T03,1960.104890
4,ARG,13T15,ARG,01T03,257.570464
...,...,...,...,...,...
4064250,TWN,All non-essential,ZAF,All non-essential,49.705607
4064251,USA,All non-essential,ZAF,All non-essential,212.045494
4064252,VNM,All non-essential,ZAF,All non-essential,4.406049
4064253,ZAF,All non-essential,ZAF,All non-essential,20863.262335


In [17]:
# EXPORT TO CSV
#IO_MELT.to_csv('/project_data/data_asset/IO_WORLD_LONG.csv', index = False)