## Hsieh Weighted TFPG estimation 



**Note**: for 2009-2019 annual special income in monthly 

## Init

In [62]:
import numpy as np
import pandas as pd
from functools import reduce
import plotly.express as px
from plotnine import *

## Import Data

In [63]:
#Capital
gfcf = pd.read_csv('./data/KRGFCF_breakdown.csv', index_col=[0]).dropna()
gfcf_real = pd.read_csv('./data/KRGFCF_breakdown_real.csv', index_col=[0]).dropna()

In [64]:
gdp_deflator = pd.read_csv('./data/KRGDPDeflator.csv', skiprows=26)
gdp_deflator.columns = ['year', 'GDPDeflator']


In [65]:
discount_rate = pd.read_csv('./data/KRDiscountRate.csv')
discount_rate.columns =['date', 'DiscountRate']

In [66]:
ls_oecd = pd.read_csv('./data/KRLS_OECD.csv', skiprows=25)
ls_oecd.columns = ['year', 'LS']

In [67]:
#Labor
wwc = pd.read_csv('./data/KRWdlnw.csv')
wwc.columns = ['year', 'WeightedWageChange']
wwc.set_index('year', inplace=True)

## Tidy Data
tidy and transform to a harmonised format

### Capital

In [68]:
def unflatten_cols(df: pd.DataFrame, delim: str = ""):
    """Unflatten a single column level into multiple column levels.

    Args:
        delim: the delimiter to split on to identify the multiple column values.

    Returns:
        A copy of the dataframe with the new column levels.

    """
    new_cols = pd.MultiIndex.from_tuples([tuple(col.split(delim)) for col in df.columns])
    ndf = df.copy()
    ndf.columns = new_cols

    return ndf


In [69]:
gfcf =\
(gfcf
 .rename(columns=lambda x: x.replace("GDP: sa: GFCF: ", "").replace(" ", ""))
 .drop(['Construction(CS)', 'CS:Buildings', 'FacilitiesInvestment(FI)', 'IntellectualPropertyProducts(IPP)'], axis=1)
 .pipe(unflatten_cols, ":")
 .rename_axis(['Category', 'SubCategory1','SubCategory2'], axis=1)
 #filter
 .loc[:,'CS':'FI']
)

In [70]:
gfcf_real =\
(gfcf_real
 .rename(columns=lambda x: x.replace("GDP: 2015p: sa: GFCF: ", "").replace(" ", ""))
 .drop(['Construction(CS)', 'CS:Buildings', 'FacilitiesInvestment(FI)', 'IntellectualPropertyProducts(IPP)'], axis=1)
 .pipe(unflatten_cols, ":")
 .rename_axis(['Category', 'SubCategory1','SubCategory2'], axis=1)
 #filter
 .loc[:,'CS':'FI']
)

## Transform Data

### Capital

Nominal

In [71]:
#share
gfcf_sum =\
(gfcf
 .sum(axis=1)
)

gfcf_share = gfcf.div(gfcf_sum, axis=0)

Real

In [72]:
gfcf_deflator = gfcf/gfcf_real


def set100(df, year):

    df = df.query(f'year >= {str(year)}').copy()
    df.loc[1980,:] = 100
    
    return df

    
gfcf_deflator =\
(gfcf_deflator
 .apply(np.log)
 .diff()
 .apply(lambda x: x+1)
 .pipe(set100, 1980)
 .cumprod()
)

In [73]:
gdp_deflator =\
(gdp_deflator
 .set_index('year')
 .apply(np.log)
 .diff()
 .apply(lambda x: x+1)
 .pipe(set100, 1980)
 .cumprod()
 ['GDPDeflator']
)

In [74]:
gfcf_relative_price = gfcf_deflator.div(gdp_deflator, axis=0)

**Merge**

In [75]:
gfcf

Category,CS,CS,CS,FI,FI
SubCategory1,Buildings,Buildings,CivilEngineering,TransportEquipment,Machinery&Equipment
SubCategory2,ResidentialBuilding,NonResidentialBuilding,NaN,NaN,NaN
year,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
1970,165.9,145.4,180.9,89.6,124.4
1971,166.0,132.7,199.6,120.6,164.0
1972,155.5,148.7,231.6,159.7,196.1
1973,236.7,250.8,276.8,225.3,324.8
1974,511.3,320.0,367.7,461.7,447.4
1975,627.9,399.0,553.9,579.7,645.8
1976,705.9,539.9,727.0,734.4,1019.9
1977,967.4,693.3,1107.9,822.1,1718.7
1978,1568.4,1123.7,1458.3,1173.6,2900.7
1979,1812.7,1554.6,2119.6,1372.2,4031.4


In [49]:
# unstack and create tidy format
gfcf =\
(gfcf
 .unstack()
 .reorder_levels([3,0,1,2])
 .sort_index(0)
 .to_frame()
 .rename(columns={0:'Value'})
)

gfcf_relative_price =\
(gfcf_relative_price
 .unstack()
 .reorder_levels([3,0,1,2])
 .sort_index(0)
 .to_frame()
 .rename(columns={0:'RelativePrice'})
)

gfcf_share =\
(gfcf_share
 .unstack()
 .reorder_levels([3,0,1,2])
 .sort_index(0)
 .to_frame()
 .rename(columns={0:'Share'})
)

In [50]:
discount_rate =\
(discount_rate
 .assign(date=lambda x: pd.to_datetime(x.date))
 .assign(year=lambda x: x.date.dt.year,
         month=lambda x: x.date.dt.month)
 .query('month==12')
 .set_index('year')
 .rename_axis('Year', axis=0)
 ['DiscountRate']
 .apply(lambda x: x/100)
)

In [51]:
pi =\
(gdp_deflator
 .apply(np.log)
 .diff()
)

In [52]:
ls_oecd =\
ls_oecd.set_index('year')['LS']

In [53]:
# Merge

def add_delta(df):
    df.loc[lambda x: x.index.get_level_values(3) == 'NonResidentialBuilding','Depreciation'] = 0.013
    df.loc[lambda x: x.index.get_level_values(3) == 'ResidentialBuilding','Depreciation'] = 0.029
    df.loc[lambda x: x.index.get_level_values(2) == 'CivilEngineering','Depreciation'] = 0.021
    df.loc[lambda x: x.index.get_level_values(2) == 'Machinery&Equipment','Depreciation'] = 0.138
    df.loc[lambda x: x.index.get_level_values(2) == 'TransportEquipment','Depreciation'] = 0.182
    return df

idx = pd.IndexSlice


gfcf_info =\
(gfcf
 .join(gfcf_relative_price, how='right')
 .join(gfcf_share, how='left')
 .join(discount_rate)
 .join(pi)
 .rename(columns={'GDPDeflator':'Pi'})
 .pipe(add_delta)
 .join(ls_oecd)
)

ValueError: cannot join with no overlapping index names

In [None]:
#add period
gfcf_info =\
(gfcf_info
 .reset_index('year')
 .assign(period=lambda x: x.year.apply(lambda x: 1 if (x>=1980) & (x<1990) else 
                                       (2 if (x>=1990) & (x<2000) else 
                                       (3 if (x>=2000) & (x<2010) else 
                                        4))))
 .set_index(['year', 'period'], append=True)
 .reorder_levels([4,3,0,1,2])
 .sort_index(0)
)

## Process Data

### Capital

In [None]:
wrc =\
(gfcf_info
 .assign(RealInterestRate=lambda x: x.DiscountRate - x.Pi,
         rent=lambda x: x.RealInterestRate + x.Depreciation,
         WeightedRentChange=lambda x: x.Share*x.RelativePrice*x.rent
        )
 .groupby('year')
 ['WeightedRentChange']
 .sum()
 .to_frame()
)

In [54]:
# average LS
avgls =\
(gfcf_info
 .groupby('period')
 .agg({'LS':'mean'})
 .rename(columns={'LS':'avgLS'})
 .iloc[:,0]
)

In [55]:
dual =\
(wwc
 .join(wrc)
 .join(ls_oecd)
 .fillna(method='ffill')
 .assign(LS=lambda x: x.LS/100,
         KS=lambda x: 1-x.LS,
         Dual=lambda x: x.LS*x.WeightedWageChange + x.KS*x.WeightedRentChange)
)

In [56]:
dual.to_csv('./output/KRDual.csv')

## Visualize

In [57]:
wdlnh = pd.read_csv('./data/KRWdlnH.csv')
wdlnh.columns = ['year', 'WeightedManHourChange']

In [58]:
plot_df =\
(dual.reset_index().merge(wdlnh, on='year').drop(columns=['LS', 'KS'])
 .melt(id_vars=['year'])
)

In [59]:
plot_df.head()

Unnamed: 0,year,variable,value
0,1994,WeightedWageChange,0.002666
1,1995,WeightedWageChange,0.031186
2,1996,WeightedWageChange,0.100431
3,1997,WeightedWageChange,0.026909
4,1998,WeightedWageChange,-0.054065


In [60]:
px.line(plot_df, x='year', y='value', color='variable')

In [61]:
dual.query('year >= 2008').mean()

WeightedWageChange   -0.048927
WeightedRentChange    0.043121
LS                    0.688493
KS                    0.311507
Dual                 -0.019751
dtype: float64