# Task 

This notebook produces the income shocks parameters. For the U.S., we use data from the PSID and for Europe from the ECHP. The notebooks, Preparation PSID and Preparation ECHP prepare the datasets used for this task. 

In [None]:
import pandas as pd 
import numpy as np
from scipy.optimize import minimize
from functools import partial
from statsmodels.formula.api import ols

We load the ECHP

In [2]:
df = pd.read_pickle('../data_sources/echp/echp_incomes.pkl')
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,wave,year,age,co,Y,logY
country,hid,pid,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,7601,76101,1.0,1994.0,42.0,DK,0.827465,-0.189389
2,8401,84101,1.0,1994.0,70.0,DK,1.079543,0.076538
2,10701,107101,1.0,1994.0,22.0,DK,0.767751,-0.26429
2,16501,165101,1.0,1994.0,46.0,DK,1.332184,0.28682
2,19901,199101,1.0,1994.0,23.0,DK,0.818934,-0.199751


We subselect the age range for this task. 

In [3]:
df = df[(df['age']>=25) & (df['age']<=85)]
df.describe()

Unnamed: 0,wave,year,age,Y,logY
count,49335.0,49335.0,49335.0,49335.0,49335.0
mean,4.338218,1999.642059,57.551191,1.022995,-0.139069
std,2.307135,7.935081,18.349595,0.587244,0.619135
min,1.0,1994.0,25.0,0.01239,-4.39087
25%,2.0,1995.0,40.0,0.632637,-0.457858
50%,4.0,1997.0,62.0,0.896917,-0.108792
75%,6.0,1999.0,73.0,1.256659,0.228456
max,8.0,2021.0,85.0,4.053713,1.399633


These are the functions for estimating parameters by minimum distance. 

In [4]:
def vech(cov):
	T = cov.shape[0]
	v = []
	for i in range(T):
		for j in range(i,T):
			v.append(cov[j,i])
	return np.array(v)
def mdfunc(theta,cov,weight):
	rho = theta[0]
	sige = theta[1]
	sigv = theta[2]
	T = cov.shape[0]
	cov_true = np.zeros((T,T))
	for i in range(T):
		for j in range(T):
			cov_true[i,j] = (i==j)*sigv + (rho**(np.abs(i-j)))*sige/(1.0-rho**2)
	d = vech(cov) - vech(cov_true)
	func = d.reshape((1,len(d))) @ weight @ d.reshape((len(d),1))
	return func[0]

In [5]:
np.random.seed(seed=1234)
def get_cov(data,reps):
	T = len(data.columns)
	cov = data.cov().to_numpy()
	vcov = vech(cov)
	weight = vcov.reshape((vcov.shape[0],1))@vcov.reshape((1,vcov.shape[0]))
	weight = np.zeros((weight.shape[0],weight.shape[1]))
	for r in range(reps):
		rcov = data.sample(n=len(data),replace=True).cov().to_numpy()
		vrcov = vech(rcov)
		weight += vrcov.reshape((vrcov.shape[0],1))@vrcov.reshape((1,vrcov.shape[0]))
	weight = np.linalg.inv(weight/reps)
	return cov, weight

In [6]:
def get_pars(co):
	data = df.loc[(df['co']==co) & (df['logY'].isna()==False),:]
	data = data.reset_index()
	data = data.loc[:,['pid','wave','age','logY']]
	model = ols('logY ~ C(age)',data=data).fit()
	data.loc[:,'e'] = model.resid.to_list()
	data = data.loc[:,['pid','wave','e']]
	data = pd.pivot(data,index='pid',columns='wave')
	cov  = data.cov().to_numpy()
	weight = np.eye(vech(cov).shape[0])
	f = partial(mdfunc,cov=cov,weight=weight)
	itheta = np.array([0.9,0.02,0.02])
	opt = minimize(f,x0=itheta)
	return opt.x
	

We now estimate parameters for each countries. 

In [7]:
countries = ['DE','DK','FR','NL','IT','SP']
table = pd.DataFrame(index=['rho','sige','sigv'],columns=countries)
for co in countries:
	table[co] = get_pars(co)
table['SE'] = table.loc[:,'DK']
table

Unnamed: 0,DE,DK,FR,NL,IT,SP,SE
rho,0.920811,0.912798,0.967463,0.943292,0.937389,0.972208,0.912798
sige,0.038995,0.015786,0.013734,0.018716,0.028802,0.014819,0.015786
sigv,0.076511,0.070812,0.133274,0.118675,0.157314,0.26314,0.070812


We re-arange order for presentation in paper. 

In [8]:
table = table.loc[:,['DE','DK','FR','IT','NL','SE','SP']]
table

Unnamed: 0,DE,DK,FR,IT,NL,SE,SP
rho,0.920811,0.912798,0.967463,0.937389,0.943292,0.912798,0.972208
sige,0.038995,0.015786,0.013734,0.028802,0.018716,0.015786,0.014819
sigv,0.076511,0.070812,0.133274,0.157314,0.118675,0.070812,0.26314


We now repeat the same for the PSID (US) data. 

In [9]:
df = pd.read_pickle('../data_sources/psid/psid_incomes.pkl')
df.head()

Unnamed: 0,co,pid,year,age,Y,logY,wave
0,US,4001,1989,67.0,0.720443,-0.327888,1
2,US,4003,1989,38.0,0.900614,-0.104679,1
6,US,4008,1989,24.0,0.684175,-0.379542,1
15,US,4170,1989,34.0,0.78962,-0.236204,1
16,US,4172,1989,40.0,1.564385,0.447493,1


In [10]:
df = df.loc[(df['age']>=25) & (df['age']<=85),:]
df.describe()

Unnamed: 0,pid,year,age,Y,logY,wave
count,64552.0,64552.0,64552.0,64552.0,64552.0,64552.0
mean,3497481.0,1993.055103,45.85593,0.992746,-0.280264,5.055103
std,2304755.0,2.518986,15.271426,0.685772,0.802517,2.518986
min,4001.0,1989.0,25.0,0.093259,-2.372372,1.0
25%,1398002.0,1991.0,34.0,0.46773,-0.759865,3.0
50%,2665005.0,1993.0,42.0,0.845519,-0.167804,5.0
75%,5815001.0,1995.0,56.0,1.358952,0.306714,7.0
max,6872177.0,1997.0,85.0,2.982882,1.09289,9.0


In [11]:
pars = get_pars('US')
table.loc[:,'US'] = pars

We add to the table the stationary variance of the process. 

In [12]:
table.loc['sigs',:] = table.loc['sige',:]/(1.0-table.loc['rho',:]**2)
table

Unnamed: 0,DE,DK,FR,IT,NL,SE,SP,US
rho,0.920811,0.912798,0.967463,0.937389,0.943292,0.912798,0.972208,0.958992
sige,0.038995,0.015786,0.013734,0.028802,0.018716,0.015786,0.014819,0.039076
sigv,0.076511,0.070812,0.133274,0.157314,0.118675,0.070812,0.26314,0.097208
sigs,0.256367,0.094643,0.214538,0.237439,0.169834,0.094643,0.270357,0.486429


This is Table 5 in the paper. 

In [13]:
table.round(3).to_latex('../tables/table_5_income_shocks.tex')

Saving parameters for input in the model. 

In [14]:
params = table.loc[['rho','sige'],:]
params.to_pickle('../model/params/income_shocks.pkl')