# Process ADB MRIO Excel files

This notebook converts the ADB MRIO Excel files into machine-readable format and saves them as space-efficient parquet files. These are stored in `data/interim/`. 

In [1]:
import pandas as pd
import os
import re

In [2]:
def process_table(df):
    
    # Remove the last row and the first 2 columns
    df = df.drop(df.index[-1])
    df = df.iloc[:, 2:]

    # Collapse MultiIndex headers into one
    df.columns = [f'{level_1}_{level_2}' for level_1, level_2 in df.columns]

    # Rename the ToT column
    colnames = df.columns.tolist()
    mapping = {colnames[-1]: 'ToT'}
    df = df.rename(columns=mapping)

    # Fix row labels
    rowlabels = [f"{c}_{d}" if not (pd.isna(c) or c == 'ToT') else d for c, d in zip(df.iloc[:, 0], df.iloc[:, 1])]
    df.insert(2, '', rowlabels)
    df = df.iloc[:, 2:]
    
    # Drop intermediates totals
    df = df.drop(df[df[''] == 'r60'].index)

    # Replace blank cells with zero
    df = df.replace(' ', 0)

    return df

In [3]:
def load_and_save(inputfolder, outputfolder, outputfile, outputsuffix=None):
    
    filelist = [file for file in os.listdir(f'../data/raw/{inputfolder}') if not file.startswith('.')]

    for file in filelist:
        mrio = pd.read_excel(
            f'../data/raw/{folder}/{file}',
            skiprows=5,
            header=[0,1]
        )
        mrio = process_table(mrio)
        year = re.search('[0-9]{4}', file).group()
        
        if outputsuffix is None:
            mrio.to_parquet(f'../data/interim/{outputfolder}/{outputfile}-{year}.parquet', index=False)
        else:
            mrio.to_parquet(f'../data/interim/{outputfolder}/{outputfile}-{year}{outputsuffix}.parquet', index=False)
        
        print(f'{year} done')

## ADB MRIO 72 economies

In [53]:
folder = 'ADB MRIO, 72 economies as of Dec 2022'
load_and_save(inputfolder=folder, outputfolder='ADB-MRIO', outputfile='ADB-MRIO')

2019 done
2018 done
2021 done
2020 done
2017 done


In [6]:
folder = 'ADB MRIO, 72 economies as of Dec 2022'
filelist = [file for file in os.listdir(f'../data/raw/{folder}') if not file.startswith('.')]
filelist

['ADB-MRIO-2019_Dec2022.xlsx',
 'ADB-MRIO-2018_Dec2022.xlsx',
 'ADB-MRIO-2021_Dec2022-1 (1).xlsx',
 'ADB-MRIO-2020_Dec2022.xlsx',
 'ADB-MRIO-2017_Dec2022-2.xlsx']

In [7]:
mrio = pd.read_excel(
    f'../data/raw/{folder}/{filelist[2]}',
    skiprows=5,
    header=[0,1]
)
mrio = process_table(mrio)
mrio

Unnamed: 0,Unnamed: 1,AUS_c1,AUS_c2,AUS_c3,AUS_c4,AUS_c5,AUS_c6,AUS_c7,AUS_c8,AUS_c9,...,NZL_F2,NZL_F3,NZL_F4,NZL_F5,RoW_F1,RoW_F2,RoW_F3,RoW_F4,RoW_F5,ToT
0,AUS_c1,16660.069001,230.836646,33881.350535,1216.955406,77.260536,2811.408777,8.542855,17.981032,394.837217,...,0.00000,0.005777,0.054069,-0.020146,5.161392e-01,0.000032,0.006373,4.158747e-02,0.000448,9.013482e+04
1,AUS_c2,435.916807,20070.056400,895.070806,22.422609,3.237481,117.955734,362.726066,1456.024006,3109.083858,...,0.00000,0.000000,0.000000,2.734331,2.544863e+00,0.000122,0.861179,0.000000e+00,-0.067652,2.613343e+05
2,AUS_c3,2036.204502,328.289786,9580.528633,9.950032,247.808778,22.275561,22.114697,63.582005,254.873984,...,0.00000,0.000000,0.000000,1.715724,4.304292e+01,0.004463,0.192437,0.000000e+00,0.077131,8.975084e+04
3,AUS_c4,30.709958,46.173854,35.095445,72.693088,4.763257,10.342989,15.568088,2.628250,34.530262,...,0.00000,0.000000,1.223987,-0.147545,1.294181e+02,0.013558,2.572366,2.877397e+00,1.827261,4.128086e+03
4,AUS_c5,1.269802,3.272219,4.522744,25.995031,12.103381,0.426392,4.121031,0.184442,0.598499,...,0.00000,0.000000,0.000000,-0.193275,3.447461e+02,0.034445,1.694869,0.000000e+00,2.316535,1.027787e+03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2558,r62,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000e+00,0.000000,1.209317e+05
2559,r63,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000e+00,0.000000,-1.045070e+05
2560,r64,35220.088763,167212.015115,21170.875924,1470.325992,328.878962,3663.079663,8359.335641,3029.470045,8467.888443,...,0.00000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000e+00,0.000000,9.238446e+07
2561,trs,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00


## ADB MRIO 62 economies

In [54]:
folder = 'ADB MRIO, 62 economies'
load_and_save(inputfolder=folder, outputfolder='ADB-MRIO62', outputfile='ADB-MRIO62')

2008 done
2009 done
2011 done
2018 done
2016 done
2019 done
2010 done
2012 done
2015 done
2014 done
2013 done
2017 done
2007 done
2021 done
2020 done
2000 done


## ADB MRIO 72 economies (version Jun 2023)

In [55]:
folder = '9 MRIO 2020-2022 for upload (ao Jun 2023)'
load_and_save(inputfolder=folder, outputfolder='ADB-MRIO_jun2023', outputfile='ADB-MRIO', outputsuffix='_jun2023')

2021 done
2022 done
2020 done
