In [5]:
import pandas as pd 
import numpy as np 
from matplotlib import pyplot as plt

# Preliminaries

We first load up the merged data from SHARE and HRS. 

In [6]:
df = pd.read_pickle('../data_sources/hrs-share_wide_select.pkl')

In [10]:
df.head()

Unnamed: 0,riwstat_w1,riwstat_w2,rage_w1,rage_w2,radla_w1,radla_w2,wgid_w1,wgid_w2,hitot_w1,hitot_w2,...,hhidpn_w2,pppx_w2,rdrinkv_w2,cid,cname,share,gg2,gb2,g_w1,g_w2
3010,1.0,1.0,68.0,70.0,0.0,0.0,4067.0,4093.0,43736.506385,34076.999401,...,,,,8,US,0,1.0,,1.0,1.0
3020,1.0,1.0,65.0,67.0,0.0,0.0,4132.0,4210.0,43736.506385,34076.999401,...,,,,8,US,0,1.0,,1.0,1.0
10001010,1.0,1.0,64.0,66.0,0.0,0.0,6890.0,7434.0,9298.986526,9329.509552,...,,,,8,US,0,1.0,,1.0,1.0
10004010,1.0,1.0,64.0,66.0,1.0,0.0,5011.0,5217.0,75047.850487,63443.065392,...,,,,8,US,0,,1.0,0.0,1.0
10004040,1.0,1.0,58.0,60.0,0.0,0.0,5182.0,5299.0,75047.850487,63443.065392,...,,,,8,US,0,1.0,,1.0,1.0


This is the function for a weighted mean. 

In [11]:
def wmean(x,var,wvar):
    xx = x.loc[~x[var].isna(),:]
    names = {var: (xx[var] * xx[wvar]).sum()/xx[wvar].sum()}
    return pd.Series(names, index=[var])

# Computing moments 

We compute the mean and standard deviation of moments using the bootstrap.

In [12]:
cnames = ['US','FR','IT','DE','DK','SP','NL','SE']

In [13]:
def onesamp(df):
	table = pd.DataFrame(index=cnames,columns=['gg2','gb2'])
	# sample with replacement
	dfi = df.groupby('cname', group_keys=False).apply(lambda x: x.sample(n=len(x),replace=True))
	# compute stats
	for c in table.columns:
		table[c] = dfi.groupby('cname').apply(wmean,var=c,wvar='wgid_w1')
	for c in table.index:
		p2 = np.zeros((2,2))
		p2[0,0] = table.loc[c,'gg2']
		p2[0,1] = 1-p2[0,0]
		p2[1,0] = table.loc[c,'gb2']
		p2[1,1] = 1- p2[1,0]
		eigvalues,eigvectors = np.linalg.eig(p2)
		eigvalues = np.sqrt(eigvalues)
		p1 = eigvectors @ np.diag(eigvalues) @ np.linalg.inv(eigvectors)
		table.loc[c,'gg'] = p1[0,0]
		table.loc[c,'gb'] = p1[1,0]
		df_c = dfi.loc[dfi['cname']==c,:]
		df_c.loc[:,'qinc'] = pd.qcut(df_c.loc[:,'hitot_w2'],q=5)
		grad = df_c.groupby('qinc').apply(wmean,var='g_w1',wvar='wgid_w1')
		grad.index = [x for x in range(1,6)]
		table.loc[c,'g_q2'] = grad.loc[2,'g_w1']/grad.loc[1,'g_w1']
		table.loc[c,'g_q3'] = grad.loc[3,'g_w1']/grad.loc[1,'g_w1']
		table.loc[c,'g_q4'] = grad.loc[4,'g_w1']/grad.loc[1,'g_w1']
		table.loc[c,'g_q5'] = grad.loc[5,'g_w1']/grad.loc[1,'g_w1']
		
	table = table[['gg','gb','g_q2','g_q3','g_q4','g_q5']]
	return table
	

In [14]:
moments = pd.DataFrame(columns=['rep','gg','gb','g_q2','g_q3','g_q4','g_q5'])
nreps = 1000
np.random.seed(1234)
for r in range(nreps):
	mom_r = onesamp(df)
	mom_r['rep'] = int(r)
	moments = moments.append(mom_r)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c.loc[:,'qinc'] = pd.qcut(df_c.loc[:,'hitot_w2'],q=5)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c.loc[:,'qinc'] = pd.qcut(df_c.loc[:,'hitot_w2'],q=5)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c.loc[:,'qinc'] = pd.qcut(df_c.loc[:,'hitot_w2'],q=5)
A value is trying to be set on a c

In [16]:
moments = moments.reset_index()
moments.set_index(['index','rep'],inplace=True)

These are the means

In [17]:
moments.groupby('index').mean()

Unnamed: 0_level_0,gg,gb,g_q2,g_q3,g_q4,g_q5
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
DE,0.97413,0.324954,1.031285,1.060159,1.076667,1.094326
DK,0.979951,0.349616,1.079872,1.0852,1.1023,1.115071
FR,0.9741,0.314905,1.005314,1.020741,1.072521,1.078967
IT,0.971979,0.321504,1.007445,1.026823,1.00235,1.0374
NL,0.973349,0.482957,1.026273,1.046977,1.025111,1.018553
SE,0.972525,0.299521,1.01737,1.052966,1.067656,1.07219
SP,0.964747,0.357488,1.025436,1.024959,1.02447,1.052577
US,0.96166,0.205422,1.167562,1.227079,1.267897,1.309073


And the standard deviations

In [18]:
moments.groupby('index').std()

Unnamed: 0_level_0,gg,gb,g_q2,g_q3,g_q4,g_q5
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
DE,0.00357,0.043745,0.032738,0.029921,0.029954,0.029093
DK,0.003525,0.049239,0.037587,0.037203,0.035282,0.033724
FR,0.003529,0.036997,0.028492,0.027611,0.024326,0.024265
IT,0.003825,0.042802,0.028132,0.028091,0.029154,0.026625
NL,0.004043,0.064694,0.021687,0.019366,0.020938,0.021796
SE,0.003644,0.043259,0.026813,0.02334,0.022942,0.022363
SP,0.005239,0.04396,0.033412,0.034185,0.035465,0.03133
US,0.001577,0.009521,0.019621,0.018949,0.019077,0.018596


We save these results