In [1]:
import pandas as pd 
import numpy as np
from scipy.optimize import minimize
from functools import partial
from statsmodels.formula.api import ols

In [2]:
df = pd.read_pickle('../data_sources/echp/echp_incomes.pkl')
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,wave,year,age,co,Y,logY
country,hid,pid,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,7601,76101,1.0,1994.0,42.0,DK,0.829935,-0.186408
2,8401,84101,1.0,1994.0,70.0,DK,1.082765,0.079518
2,10701,107101,1.0,1994.0,22.0,DK,0.770043,-0.26131
2,16501,165101,1.0,1994.0,46.0,DK,1.336161,0.2898
2,19901,199101,1.0,1994.0,23.0,DK,0.821379,-0.196771


In [3]:
df = df[(df['age']>=25) & (df['age']<=85)]
df.describe()

Unnamed: 0,wave,year,age,Y,logY
count,48395.0,48395.0,48395.0,48395.0,48395.0
mean,4.340469,1999.639054,57.623473,1.01424,-0.118761
std,2.304796,7.92622,18.380789,0.528584,0.53888
min,1.0,1994.0,25.0,0.057038,-2.864029
25%,2.0,1995.0,40.0,0.644961,-0.438565
50%,4.0,1997.0,62.0,0.906191,-0.098505
75%,6.0,1999.0,73.0,1.258058,0.229569
max,8.0,2021.0,85.0,3.519351,1.258277


In [4]:
def vech(cov):
	T = cov.shape[0]
	v = []
	for i in range(T):
		for j in range(i,T):
			v.append(cov[j,i])
	return np.array(v)
def mdfunc(theta,cov,weight):
	rho = theta[0]
	sige = theta[1]
	sigv = theta[2]
	T = cov.shape[0]
	cov_true = np.zeros((T,T))
	for i in range(T):
		for j in range(T):
			cov_true[i,j] = (i==j)*sigv + (rho**(np.abs(i-j)))*sige/(1.0-rho**2)
	d = vech(cov) - vech(cov_true)
	func = d.reshape((1,len(d))) @ weight @ d.reshape((len(d),1))
	return func[0]

In [5]:
np.random.seed(seed=1234)
def get_cov(data,reps):
	T = len(data.columns)
	cov = data.cov().to_numpy()
	vcov = vech(cov)
	weight = vcov.reshape((vcov.shape[0],1))@vcov.reshape((1,vcov.shape[0]))
	weight = np.zeros((weight.shape[0],weight.shape[1]))
	for r in range(reps):
		rcov = data.sample(n=len(data),replace=True).cov().to_numpy()
		vrcov = vech(rcov)
		weight += vrcov.reshape((vrcov.shape[0],1))@vrcov.reshape((1,vrcov.shape[0]))
	weight = np.linalg.inv(weight/reps)
	return cov, weight

In [6]:
def get_pars(co):
	data = df.loc[(df['co']==co) & (df['logY'].isna()==False),:]
	data = data.reset_index()
	data = data.loc[:,['pid','wave','age','logY']]
	model = ols('logY ~ C(age)',data=data).fit()
	data.loc[:,'e'] = model.resid.to_list()
	data = data.loc[:,['pid','wave','e']]
	data = pd.pivot(data,index='pid',columns='wave')
	cov  = data.cov().to_numpy()
	weight = np.eye(vech(cov).shape[0])
	f = partial(mdfunc,cov=cov,weight=weight)
	itheta = np.array([0.9,0.02,0.02])
	opt = minimize(f,x0=itheta)
	return opt.x
	

In [7]:
countries = ['DE','DK','FR','NL','IT','SP']
table = pd.DataFrame(index=['rho','sige','sigv'],columns=countries)
for co in countries:
	table[co] = get_pars(co)
table['SE'] = table.loc[:,'DK']
table

Unnamed: 0,DE,DK,FR,NL,IT,SP,SE
rho,0.928252,0.934175,0.966967,0.939743,0.912178,0.97228,0.934175
sige,0.029244,0.011039,0.012231,0.017194,0.032484,0.010824,0.011039
sigv,0.055352,0.073201,0.081796,0.086212,0.096826,0.085114,0.073201


In [8]:
table = table.loc[:,['DE','DK','FR','IT','NL','SE','SP']]
table

Unnamed: 0,DE,DK,FR,IT,NL,SE,SP
rho,0.928252,0.934175,0.966967,0.912178,0.939743,0.934175,0.97228
sige,0.029244,0.011039,0.012231,0.032484,0.017194,0.011039,0.010824
sigv,0.055352,0.073201,0.081796,0.096826,0.086212,0.073201,0.085114


In [9]:
df = pd.read_pickle('../data_sources/psid/psid_incomes.pkl')
df.head()

Unnamed: 0,co,pid,year,age,Y,logY,wave
0,US,4001,1989,67.0,0.720443,-0.327888,1
2,US,4003,1989,38.0,0.900614,-0.104679,1
6,US,4008,1989,24.0,0.684175,-0.379542,1
15,US,4170,1989,34.0,0.78962,-0.236204,1
16,US,4172,1989,40.0,1.564385,0.447493,1


In [10]:
df = df.loc[(df['age']>=25) & (df['age']<=85),:]
df.describe()

Unnamed: 0,pid,year,age,Y,logY,wave
count,63267.0,63267.0,63267.0,63267.0,63267.0,63267.0
mean,3499166.0,1993.040448,45.861792,0.985992,-0.279632,5.040448
std,2304756.0,2.519332,15.30052,0.679334,0.794749,2.519332
min,4001.0,1989.0,25.0,0.032046,-3.44059,1.0
25%,1398003.0,1991.0,34.0,0.473577,-0.747441,3.0
50%,2665006.0,1993.0,42.0,0.844432,-0.169092,5.0
75%,5814034.0,1995.0,56.0,1.344353,0.295913,7.0
max,6872177.0,1997.0,85.0,4.048423,1.398328,9.0


In [11]:
pars = get_pars('US')
table.loc[:,'US'] = pars

In [12]:
table.loc['sigs',:] = table.loc['sige',:]/(1.0-table.loc['rho',:]**2)
table

Unnamed: 0,DE,DK,FR,IT,NL,SE,SP,US
rho,0.928252,0.934175,0.966967,0.912178,0.939743,0.934175,0.97228,0.958145
sige,0.029244,0.011039,0.012231,0.032484,0.017194,0.011039,0.010824,0.037235
sigv,0.055352,0.073201,0.081796,0.096826,0.086212,0.073201,0.085114,0.118764
sigs,0.211379,0.086705,0.188239,0.193439,0.1471,0.086705,0.197983,0.45432


In [13]:
table.round(3).to_latex('../tables/table_5_income_shocks.tex')

In [14]:
params = table.loc[['rho','sige'],:]
params.to_pickle('../model/params/income_shocks.pkl')