# Basic checks

This notebook flags any anomaly in the raw MRIO tables that can potentially cause problems in the mathematics of input-output analysis. Anomalies to detect include:

1. **Negative value added.** Having negative value added means that a country-sector's total input use exceeds its output. The sum of of its technical coefficients will therefore sum to greater than 1, which can cause the matrix $I - A$ to be singular.

1. **Zero output, nonzero inputs and value added.** If country-sectors have zero output, it must follow that both input use and value added are zero.

In [2]:
import pandas as pd
import os

## Helper functions

In [81]:
def process_table(df):
    
    # Remove the last row and the first 2 columns
    df = df.drop(df.index[-1])
    df = df.iloc[:, 2:]

    # Collapse MultiIndex headers into one
    df.columns = [f'{level_1}_{level_2}' for level_1, level_2 in df.columns]

    # Rename the ToT column
    colnames = df.columns.tolist()
    mapping = {colnames[-1]: 'ToT'}
    df = df.rename(columns=mapping)

    # Fix row labels
    rowlabels = [f"{c}_{d}" if not (pd.isna(c) or c == 'ToT') else d for c, d in zip(df.iloc[:, 0], df.iloc[:, 1])]
    df.insert(2, '', rowlabels)
    df = df.iloc[:, 2:]
    
    # Drop intermediates totals
    df = df.drop(df[df[''] == 'r60'].index)

    # Replace blank cells with zero
    df = df.replace(' ', 0)

    return df

In [82]:
def check_negative_va(df):
    va = df.iloc[-7:-1].filter(regex='[A-Z]{3}_c\d')
    va = va.sum(axis=0)
    return va[va < 0].index.tolist()

In [83]:
def check_z_x_mismatch(df):
    Z = df.iloc[:-7].filter(regex='[A-Z]{3}_c\d')
    zuse = Z.sum(axis=0)
    x = df.iloc[-1].filter(regex='[A-Z]{3}_c\d')
    return 

In [84]:
def flag_anomalies(filelist):
    negative_va = {}
    z_x_mismatch = {}

    for file in filelist:
        mrio = pd.read_excel(
            '../data/raw/' + foldername + '/' + file,
            skiprows=5,
            header=[0,1]
        )
        mrio = process_table(mrio)
        negative_va[file] = check_negative_va(mrio)
        z_x_mismatch[file] = check_z_x_mismatch(mrio)
    
    return negative_va, z_x_mismatch

## Checks

### MRIO 2020-2022 for upload (ao Jun 2023)

In [None]:
foldername = '9 MRIO 2020-2022 for upload (ao Jun 2023)'
filelist = [file for file in os.listdir('../data/raw/' + foldername) if not file.startswith('.')]

negative_va, z_x_mismatch = flag_anomalies(filelist)

print('Negative VA')
for file, list in negative_va.items():
    print(f'{file}: {list}')

print('\nInputs/output mismatch')
for file, list in z_x_mismatch.items():
    print(f'{file}: {list}')

### ADB MRIO, 72 economies as of Dec 2022

In [64]:
foldername = 'ADB MRIO, 72 economies as of Dec 2022'
filelist = [file for file in os.listdir('../data/raw/' + foldername) if not file.startswith('.')]

negative_va, z_x_mismatch = flag_anomalies(filelist)

print('Negative VA')
for file, list in negative_va.items():
    print(f'{file}: {list}')

print('\nInputs/output mismatch')
for file, list in z_x_mismatch.items():
    print(f'{file}: {list}')

['ADB-MRIO-2019_Dec2022.xlsx',
 'ADB-MRIO-2018_Dec2022.xlsx',
 'ADB-MRIO-2021_Dec2022-1 (1).xlsx',
 'ADB-MRIO-2020_Dec2022.xlsx',
 'ADB-MRIO-2017_Dec2022-2.xlsx']

### ADB MRIO, 62 economies

In [96]:
foldername = 'ADB MRIO, 62 economies'
filelist = [file for file in os.listdir('../data/raw/' + foldername) if not file.startswith('.')]

negative_va, z_x_mismatch = flag_anomalies(filelist)

print('Negative VA')
for file, list in negative_va.items():
    print(f'{file}: {list}')

print('\nInputs/output mismatch')
for file, list in z_x_mismatch.items():
    print(f'{file}: {list}')

Negative VA
ADB-MRIO-2008_Mar2022.xlsx: []
ADB-MRIO-2009_Mar2022.xlsx: []
ADB-MRIO-2011_Mar2022.xlsx: ['LVA_c25', 'MLT_c2']
ADB-MRIO62-2018_Dec2022.xlsx: []
ADB-MRIO-2016_Mar2022.xlsx: []
ADB-MRIO62-2019_Dec2022.xlsx: ['CYP_c25']
ADB-MRIO-2010_Mar2022.xlsx: []
ADB-MRIO-2012_Mar2022.xlsx: ['MLT_c2']
ADB-MRIO-2015_Mar2022.xlsx: []
ADB-MRIO-2014_Mar2022.xlsx: ['MLT_c2']
ADB-MRIO-2013_Mar2022.xlsx: ['LUX_c24']
ADB-MRIO62-2017_Dec2022.xlsx: []
ADB-MRIO-2007.xlsx: []
ADB-MRIO62-2021_Dec2022.xlsx: ['CYP_c25', 'CZE_c25', 'FIN_c25', 'FIJ_c25']
ADB-MRIO62-2020_Dec2022.xlsx: ['BGR_c8', 'CYP_c25', 'CZE_c25', 'FIN_c25', 'MLT_c25', 'FIJ_c25']
ADB-MRIO-2000_Mar2022-3.xlsx: ['MLT_c2']

Inputs/output mismatch
ADB-MRIO-2008_Mar2022.xlsx: None
ADB-MRIO-2009_Mar2022.xlsx: None
ADB-MRIO-2011_Mar2022.xlsx: None
ADB-MRIO62-2018_Dec2022.xlsx: None
ADB-MRIO-2016_Mar2022.xlsx: None
ADB-MRIO62-2019_Dec2022.xlsx: None
ADB-MRIO-2010_Mar2022.xlsx: None
ADB-MRIO-2012_Mar2022.xlsx: None
ADB-MRIO-2015_Mar2022.xlsx: No