In [1]:
import pandas as pd 
import numpy as np
from scipy.optimize import minimize
from functools import partial
from statsmodels.formula.api import ols

In [2]:
df = pd.read_pickle('../data_sources/echp/echp_incomes.pkl')
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,wave,year,age,co,Y,logY
country,hid,pid,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,7601,76101,1.0,1994.0,42.0,DK,0.829935,-0.186408
2,8401,84101,1.0,1994.0,70.0,DK,1.082765,0.079518
2,10701,107101,1.0,1994.0,22.0,DK,0.770043,-0.26131
2,16501,165101,1.0,1994.0,46.0,DK,1.336161,0.2898
2,19901,199101,1.0,1994.0,23.0,DK,0.821379,-0.196771


In [3]:
df = df[(df['age']>=25) & (df['age']<=85)]
df.describe()

Unnamed: 0,wave,year,age,Y,logY
count,48797.0,48797.0,48797.0,48797.0,48797.0
mean,4.333689,1999.628092,57.594832,1.006108,-0.151534
std,2.306214,7.922599,18.381527,0.533914,0.655234
min,1.0,1994.0,25.0,0.000315,-8.064229
25%,2.0,1995.0,40.0,0.63911,-0.447679
50%,4.0,1997.0,62.0,0.900252,-0.105081
75%,6.0,1999.0,73.0,1.254588,0.226807
max,8.0,2021.0,85.0,3.519351,1.258277


In [4]:
def vech(cov):
	T = cov.shape[0]
	v = []
	for i in range(T):
		for j in range(i,T):
			v.append(cov[j,i])
	return np.array(v)
def mdfunc(theta,cov,weight):
	rho = theta[0]
	sige = theta[1]
	sigv = theta[2]
	T = cov.shape[0]
	cov_true = np.zeros((T,T))
	for i in range(T):
		for j in range(T):
			cov_true[i,j] = (i==j)*sigv + (rho**(np.abs(i-j)))*sige/(1.0-rho**2)
	d = vech(cov) - vech(cov_true)
	func = d.reshape((1,len(d))) @ weight @ d.reshape((len(d),1))
	return func[0]

In [5]:
np.random.seed(seed=1234)
def get_cov(data,reps):
	T = len(data.columns)
	cov = data.cov().to_numpy()
	vcov = vech(cov)
	weight = vcov.reshape((vcov.shape[0],1))@vcov.reshape((1,vcov.shape[0]))
	weight = np.zeros((weight.shape[0],weight.shape[1]))
	for r in range(reps):
		rcov = data.sample(n=len(data),replace=True).cov().to_numpy()
		vrcov = vech(rcov)
		weight += vrcov.reshape((vrcov.shape[0],1))@vrcov.reshape((1,vrcov.shape[0]))
	weight = np.linalg.inv(weight/reps)
	return cov, weight

In [6]:
def get_pars(co):
	data = df.loc[(df['co']==co) & (df['logY'].isna()==False),:]
	data = data.reset_index()
	data = data.loc[:,['pid','wave','age','logY']]
	model = ols('logY ~ C(age)',data=data).fit()
	data.loc[:,'e'] = model.resid.to_list()
	data = data.loc[:,['pid','wave','e']]
	data = pd.pivot(data,index='pid',columns='wave')
	cov  = data.cov().to_numpy()
	weight = np.eye(vech(cov).shape[0])
	f = partial(mdfunc,cov=cov,weight=weight)
	itheta = np.array([0.9,0.02,0.02])
	opt = minimize(f,x0=itheta)
	return opt.x
	

In [7]:
countries = ['DE','DK','FR','NL','IT','SP']
table = pd.DataFrame(index=['rho','sige','sigv'],columns=countries)
for co in countries:
	table[co] = get_pars(co)
table['SE'] = table.loc[:,'DK']
table

Unnamed: 0,DE,DK,FR,NL,IT,SP,SE
rho,0.904457,0.908792,0.958514,0.945239,0.91913,0.968671,0.908792
sige,0.048186,0.016419,0.017261,0.017505,0.03379,0.015916,0.016419
sigv,0.124999,0.103823,0.187817,0.159557,0.169408,0.393699,0.103823


In [8]:
table = table.loc[:,['DE','DK','FR','IT','NL','SE','SP']]
table

Unnamed: 0,DE,DK,FR,IT,NL,SE,SP
rho,0.904457,0.908792,0.958514,0.91913,0.945239,0.908792,0.968671
sige,0.048186,0.016419,0.017261,0.03379,0.017505,0.016419,0.015916
sigv,0.124999,0.103823,0.187817,0.169408,0.159557,0.103823,0.393699


In [9]:
df = pd.read_pickle('../data_sources/psid/psid_incomes.pkl')
df.head()

Unnamed: 0,co,pid,year,age,Y,logY,wave
0,US,4001,1989,67.0,0.763534,-0.269798,1
2,US,4003,1989,38.0,0.95448,-0.046588,1
6,US,4008,1989,24.0,0.725096,-0.321451,1
15,US,4170,1989,34.0,0.836848,-0.178113,1
16,US,4172,1989,40.0,1.657952,0.505583,1


In [10]:
df = df.loc[(df['age']>=25) & (df['age']<=85),:]
df.describe()

Unnamed: 0,pid,year,age,Y,logY,wave
count,63863.0,63863.0,63863.0,63863.0,63863.0,63863.0
mean,3513763.0,1993.044298,45.825658,1.035352,-0.262842,5.044298
std,2305610.0,2.517794,15.301322,0.723412,0.906397,2.517794
min,4001.0,1989.0,25.0,3e-05,-10.427394,1.0
25%,1401003.0,1991.0,34.0,0.491355,-0.710588,3.0
50%,2679003.0,1993.0,42.0,0.886871,-0.120055,5.0
75%,5821001.0,1995.0,56.0,1.418418,0.349542,7.0
max,6872177.0,1997.0,85.0,4.290564,1.456418,9.0


In [11]:
pars = get_pars('US')
table.loc[:,'US'] = pars

In [12]:
table.loc['sigs',:] = table.loc['sige',:]/(1.0-table.loc['rho',:]**2)

In [13]:
table.round(3).to_latex('../tables/table_5_income_shocks.tex')

In [14]:
params = table.loc[['rho','sige'],:]
params.to_pickle('../model/params/income_shocks.pkl')