# Read in IO tables from the OECD using pymrio package

In [5]:
#!pip install pymrio

In [2]:
# Imports and path
import pymrio
import pandas as pd
from pathlib import Path
oecd_storage = Path('/project_data/data_asset/')

In [3]:
# Select the year to parse - 2015 in this case
oecd_path_year = pymrio.parse_oecd(path=oecd_storage, year=2015)

To start, we are going to extract the symmetric input-ouput matrix of intermmediate consumption by sectors for one country (we choose Australia for no particular reason). The matrix is called _Z_ in pymrio.

In [4]:
# Extrac Z - The matrix has a multiindex at the column level [region, sector]. We slice by region. 
IO_AUS = oecd_path_year.Z.iloc[oecd_path_year.Z.index.get_level_values('region') == 'AUS', 
                               oecd_path_year.Z.columns.get_level_values('region') == 'AUS']
IO_AUS

Unnamed: 0_level_0,region,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS
Unnamed: 0_level_1,sector,01T03,05T06,07T08,09,10T12,13T15,16,17T18,19,20T21,...,61,62T63,64T66,68,69T82,84,85,86T88,90T96,97T98
region,sector,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
AUS,01T03,12764.521581,335.621861,134.544149,5.36722,22300.672875,135.980122,1323.988633,49.924049,4.598057,309.388619,...,35.016162,35.167907,49.24045,186.470176,782.246057,452.811382,326.715572,224.315559,256.608388,0
AUS,05T06,138.811567,4195.535273,1014.16671,664.997136,82.613866,2.951015,10.235265,55.148532,3696.839741,254.988513,...,35.363722,12.105347,14.835873,469.460153,204.522624,125.915558,108.696443,88.679568,139.706962,0
AUS,07T08,87.176441,538.886717,5535.038857,75.254832,88.410335,5.014642,15.515045,24.653482,16.050274,192.790035,...,33.055192,30.5436,31.131445,208.910359,186.551804,89.187144,81.333047,54.703897,69.717536,0
AUS,09,19.237516,5273.441198,2086.251593,1682.828181,27.565778,1.581559,5.497529,7.674105,5.186643,8.312155,...,10.641247,13.946927,9.397067,42.627355,118.950068,26.275031,13.753929,11.581111,14.841447,0
AUS,10T12,5420.514236,46.981487,56.080848,5.898351,6937.844451,40.889192,32.784869,65.106437,24.917292,323.651126,...,23.39668,27.459606,41.981435,83.631506,411.810282,691.456999,535.150278,779.659391,978.797545,0
AUS,13T15,59.266539,40.950816,45.701526,2.277478,51.250799,252.121236,33.466274,27.782605,6.154291,44.489286,...,7.819893,6.707509,5.003766,10.529053,75.462884,89.18227,48.078483,74.105308,181.38726,0
AUS,16,72.445542,188.352301,134.924692,8.241974,43.545735,3.08445,1349.815006,101.276054,5.991086,22.558744,...,4.496303,4.838831,3.646868,146.716691,154.111558,38.53391,55.338365,19.414393,121.28093,0
AUS,17T18,130.383801,89.022727,97.083161,11.355822,422.261827,26.192141,71.741215,1773.541284,18.895188,145.431717,...,113.284263,177.477701,301.157952,93.918922,832.309814,329.365167,466.133569,200.172843,276.365614,0
AUS,19,530.652825,313.953723,516.543989,64.25367,121.805725,8.512166,35.120236,38.677779,296.013959,233.702849,...,51.725652,51.488008,109.912268,148.471849,407.34512,283.402781,148.109304,109.385686,162.45275,0
AUS,20T21,662.144018,220.207993,248.013542,22.264433,185.723622,49.921674,84.515034,160.159208,110.318348,1051.495217,...,30.270339,32.868051,30.255436,152.834179,354.398116,133.868031,124.007555,983.658357,216.583839,0


In [67]:
# Check matrix is symmetrical - not only it needs to be nxn, but also that columns and rows are the same
# and ordered in the same fashion
IO_AUS.index == IO_AUS.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True])

Intermediate consumption is not the whole story. To represent the whole economy we also need to know the breakdown of taxes and subsidies that companies receive. Similarly, companies take inputs and produce an output whose value is greater than the sum of its component - the value added. These values are extra rows to the _Z_ matrix and are shown in the _F_ matrix.

In [68]:
# Break down of taxes and subsidies and value added by sector
oecd_path_year.factor_inputs.F

region,ARG,ARG,ARG,ARG,ARG,ARG,ARG,ARG,ARG,ARG,...,ZAF,ZAF,ZAF,ZAF,ZAF,ZAF,ZAF,ZAF,ZAF,ZAF
sector,01T03,05T06,07T08,09,10T12,13T15,16,17T18,19,20T21,...,61,62T63,64T66,68,69T82,84,85,86T88,90T96,97T98
inputtype,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AUS_TAXSUB,0.132996,0.017576,0.016490,0.008126,0.006253,0.082651,0.008937,0.036073,0.061325,0.199232,...,0.132380,0.040280,0.035792,0.068539,0.219342,0.269904,0.079234,0.135192,0.077326,0.000000
AUT_TAXSUB,0.052867,0.026761,0.015243,0.004556,0.053804,0.037633,0.010132,0.029677,0.009379,0.060642,...,0.096526,0.047824,0.008710,0.060626,0.161540,0.371547,0.048500,0.513344,0.073937,0.000000
BEL_TAXSUB,0.225642,0.048434,0.036728,0.009054,0.094257,0.106845,0.012598,0.047739,0.083867,0.245115,...,0.085472,0.133854,0.050437,0.143190,0.395236,0.579650,0.117868,0.475845,0.100144,0.000000
CAN_TAXSUB,0.122170,0.033575,0.054038,0.007309,0.064599,0.060188,0.008411,0.030119,0.042757,0.151584,...,0.058905,0.015413,0.003724,0.017423,0.051784,0.080184,0.016149,0.049856,0.025746,0.000000
CHL_TAXSUB,0.315159,0.110207,0.086002,0.015152,0.364894,0.839357,0.047639,0.190712,0.100263,0.360592,...,0.004691,0.012607,0.003957,0.035575,0.054481,0.062479,0.042086,0.054369,0.017233,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
THA_TAXSUB,0.032990,0.027859,0.014391,0.004627,0.032936,0.035228,0.006278,0.011148,0.007591,0.028013,...,0.083390,0.035060,0.008409,0.041220,0.115086,0.292701,0.079553,0.061259,0.043825,0.000000
TUN_TAXSUB,0.000728,0.000132,0.000095,0.000018,0.000055,0.000588,0.000056,0.000209,0.000168,0.001169,...,0.000486,0.000279,0.000065,0.000523,0.001237,0.001362,0.000581,0.001009,0.000497,0.000000
VNM_TAXSUB,-0.006366,0.005721,0.004746,0.000962,-0.033038,0.685171,0.003647,0.004392,0.001354,0.009083,...,0.110833,0.010020,0.004147,0.012975,0.066512,0.185915,0.080701,0.076021,0.106134,0.000000
ROW_TAXSUB,0.239255,5.829891,0.118004,0.704568,0.393384,0.634194,0.030984,0.275608,40.399715,2.015715,...,-0.091858,0.293590,0.003026,0.285604,0.706826,2.824502,0.354555,0.554319,0.379176,0.000000


In [69]:
# Break down of taxes and subsidies by sector for just one Australia
AUS_TAXSUB = oecd_path_year.factor_inputs.F.iloc[oecd_path_year.factor_inputs.F.index.get_level_values('inputtype') == 'AUS_TAXSUB',
                                                   oecd_path_year.factor_inputs.F.columns.get_level_values('region') == 'AUS']
AUS_TAXSUB

region,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS
sector,01T03,05T06,07T08,09,10T12,13T15,16,17T18,19,20T21,...,61,62T63,64T66,68,69T82,84,85,86T88,90T96,97T98
inputtype,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AUS_TAXSUB,952.03943,1072.901373,1309.449877,197.938503,2826.686253,153.634766,126.634534,204.805311,628.397424,474.32364,...,266.508189,234.868465,497.677048,918.23948,1413.587964,1264.248504,580.238743,657.600697,962.311856,0


In [72]:
# Value added of Australian sectors
AUS_VA = oecd_path_year.factor_inputs.F.iloc[oecd_path_year.factor_inputs.F.index.get_level_values('inputtype') == 'VALU',
                                                   oecd_path_year.factor_inputs.F.columns.get_level_values('region') == 'AUS']
AUS_VA

region,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS,AUS
sector,01T03,05T06,07T08,09,10T12,13T15,16,17T18,19,20T21,...,61,62T63,64T66,68,69T82,84,85,86T88,90T96,97T98
inputtype,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
VALU,29967.551571,30822.642351,36063.880794,5662.469088,18863.258018,1807.502925,3184.117359,4069.667497,3541.457641,7165.378425,...,16688.014055,24171.358469,106210.29824,139120.521033,105995.596906,67700.753809,60082.199558,83994.282022,31935.523313,0


A similar reasoning applies to the rows in our IO table. Not everything that companies produce is intermmediate inputs to other companies; they also produce final goods for consumers. This is represented as extra columns to our _Z_ matrix in another matrix, called _Y_, which characterises the final demand for these goods. 

In [70]:
# Final demand (i.e. not intermediate demand from sectors)
FD_AUS = oecd_path_year.Y.iloc[oecd_path_year.Y.index.get_level_values('region') == 'AUS', 
                               oecd_path_year.Y.columns.get_level_values('region') == 'AUS']
FD_AUS

Unnamed: 0_level_0,region,AUS,AUS,AUS,AUS,AUS,AUS
Unnamed: 0_level_1,category,HFCE,NPISH,GGFC,GFCF,INVNT,P33
region,sector,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
AUS,01T03,10592.09246,0.564608,1142.333194,3231.372225,143.772743,0.0
AUS,05T06,1792.289763,1.241248,397.233133,3264.412895,203.96918,0.0
AUS,07T08,590.074399,1.582092,205.06528,871.809275,-221.686017,0.0
AUS,09,109.644524,3.611901,858.219419,251.02585,-0.683895,0.0
AUS,10T12,31201.118707,0.0,52.52288,233.095197,49.598443,0.0
AUS,13T15,1517.92989,0.0,8.42749,57.611528,6.300115,0.0
AUS,16,213.697348,0.0,5.719265,168.058707,-8.175093,0.0
AUS,17T18,2397.832878,0.0,48.012623,223.591526,2.932238,0.0
AUS,19,3436.110311,0.0,9.709974,49.71792,8.536721,0.0
AUS,20T21,4798.520206,0.0,2393.696051,179.607082,14.381166,0.0


In [71]:
# Intersection of final demands and taxes and subsidies plus value added
oecd_path_year.factor_inputs.F_Y.iloc[:,oecd_path_year.factor_inputs.F_Y.columns.get_level_values('region') == 'AUS']

region,AUS,AUS,AUS,AUS,AUS,AUS
category,HFCE,NPISH,GGFC,GFCF,INVNT,P33
inputtype,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AUS_TAXSUB,30063.371105,1.614733e+01,141.046364,9267.203724,655.624831,0.000000
AUT_TAXSUB,4.942800,6.133104e-04,0.793730,3.689605,0.154411,1.663501
BEL_TAXSUB,6.664880,1.714508e-03,1.688755,1.637015,0.109310,1.300523
CAN_TAXSUB,3.185289,1.543583e-03,0.140567,2.656326,0.117786,7.538582
CHL_TAXSUB,0.852469,4.924649e-04,0.011328,0.113299,0.004010,1.960401
...,...,...,...,...,...,...
THA_TAXSUB,30.246133,1.342254e-03,0.046298,24.437567,1.899604,61.917633
TUN_TAXSUB,0.024292,2.526443e-07,-0.000129,0.002460,0.000117,0.000620
VNM_TAXSUB,8.025249,1.765390e-04,0.000187,2.146633,0.078336,10.285949
ROW_TAXSUB,43.020083,-1.803853e-02,0.060364,1.244136,0.101639,26.852780
