# Preprocess deflators Excel file

This notebook converts the deflators Excel files into machine-readable format and calculates implied inflation rates. It also uses sector value added shares to calculate country-level inflation rates.

## Set up

In [1]:
import pandas as pd
import duckdb

In [2]:
input = 'ADB-MRIO-Deflators (for 2016-2021).xlsx'
summary = 'summary62.parquet'
output = 'deflators62.parquet'

## Processing steps

In [3]:
# Load and clean
deflators = pd.read_excel(
    f'../data/raw/{input}',
    sheet_name='Production',
    skiprows=[0,1,3,4],
    header=[0,1]
)
deflators.insert(0, 's', deflators.index+1)
deflators = pd.melt(deflators, id_vars=['s', ('Year', 'Country Code')])
deflators.columns = ['s', 'code', 't', 'i', 'deflator']
deflators['i'] = deflators['i'].str.replace(r'c', '').astype(int)
deflators['t'] = deflators['t'].astype(int)

# Load country-sector value added
va = duckdb.sql(f"SELECT t, s, i, va FROM read_parquet('../data/{summary}')").df()
va_sum = va.groupby(['s', 't'])['va'].sum().reset_index()
va_sum.rename(columns={'va': 'va_sum'}, inplace=True)
va = pd.merge(va, va_sum, on=['s', 't'])
va['va_sh'] = va['va'] / va['va_sum']

deflators = pd.merge(deflators, va)

# Compute aggregate deflator
deflators['def_va_sh'] = deflators['deflator'] * deflators['va_sh']
deflators_agg = deflators.groupby(['t', 's'])['def_va_sh'].sum().reset_index()
deflators_agg.rename(columns={'def_va_sh': 'deflator'}, inplace=True)
deflators_agg['agg'] = 0
deflators_agg['i'] = 0

# Consolidate
deflators['agg'] = 35
deflators.drop(['code', 'va', 'va_sum', 'va_sh', 'def_va_sh'], axis=1, inplace=True)
deflators = pd.concat([deflators, deflators_agg])
deflators = deflators[['s', 'agg', 'i', 't', 'deflator']]
deflators.sort_values(by=['agg', 's', 'i', 't'], inplace=True)

# Compute inflation
deflators['inflation'] = deflators.groupby(['agg', 's', 'i'])['deflator'].pct_change()
deflators.loc[deflators['t'] == 2007, 'inflation'] = deflators.loc[deflators['t'] == 2007, 'inflation'] / 7

deflators.to_parquet(f'../data/reer/{output}', index=False)

### View results

In [4]:
duckdb.sql(f"SELECT * FROM read_parquet('../data/reer/{output}')").df()

Unnamed: 0,s,agg,i,t,deflator,inflation
0,1,0,0,2000,43.888733,
1,1,0,0,2007,81.567056,0.122642
2,1,0,0,2008,85.653027,0.050093
3,1,0,0,2009,80.901563,-0.055473
4,1,0,0,2010,100.000000,0.236070
...,...,...,...,...,...,...
36283,63,35,35,2017,114.018535,0.048766
36284,63,35,35,2018,112.125085,-0.016607
36285,63,35,35,2019,112.375223,0.002231
36286,63,35,35,2020,111.003705,-0.012205
