# Comparison of Health Measures

In section 2, Table 4, we compare health outcomes across countries. This comparison is done with SHARE and HRS data along with data on remaining life expectancy from mortality.org, the Human Mortality Database. 

In [310]:
import pandas as pd 
import numpy as np 

## Health Data

We use data from our sample of age 50 to 75 respondents in SHARE and HRS 2004. We compute the prevalence of various health conditions in the data (weighted). 

In [311]:
df = pd.read_pickle('../data_sources/hrs-share_wide_select.pkl')

In [312]:
df.head()

Unnamed: 0,riwstat_w1,riwstat_w2,rage_w1,rage_w2,radla_w1,radla_w2,wgid_w1,wgid_w2,hitot_w1,hitot_w2,...,hhidpn_w2,pppx_w2,rdrinkv_w2,cid,cname,share,gg2,gb2,g_w1,g_w2
3010,1.0,1.0,68.0,70.0,0.0,0.0,4067.0,4093.0,43736.506385,34076.999401,...,,,,8,US,0,1.0,,1.0,1.0
3020,1.0,1.0,65.0,67.0,0.0,0.0,4132.0,4210.0,43736.506385,34076.999401,...,,,,8,US,0,1.0,,1.0,1.0
10001010,1.0,1.0,64.0,66.0,0.0,0.0,6890.0,7434.0,9298.986526,9329.509552,...,,,,8,US,0,1.0,,1.0,1.0
10004010,1.0,1.0,64.0,66.0,1.0,0.0,5011.0,5217.0,75047.850487,63443.065392,...,,,,8,US,0,,1.0,0.0,1.0
10004040,1.0,1.0,58.0,60.0,0.0,0.0,5182.0,5299.0,75047.850487,63443.065392,...,,,,8,US,0,1.0,,1.0,1.0


In [313]:
df['tot_cond'] = df[['rhibpe_w1','rdiabe_w1','rlunge_w1','rhearte_w1','rstroke_w1']].sum(axis=1)

We tag along remaining life expectancy data to these (from HMD, 2005, both sexes). We input it here just to get the weighted average in Europe right. 

In [314]:
ex = {1: 31.14,2: 32.11,3: 31.22,4:32.26,5:32.66,6:32.56,7:30.15,8:30.65}

In [315]:
df['ex50'] = df['cid'].replace(ex)

In [316]:
outcomes = ['rhibpe_w1','rdiabe_w1','rlunge_w1','rhearte_w1','rstroke_w1','tot_cond','radla_w1','ex50']

In [317]:
df[outcomes].describe()

Unnamed: 0,rhibpe_w1,rdiabe_w1,rlunge_w1,rhearte_w1,rstroke_w1,tot_cond,radla_w1,ex50
count,28490.0,28489.0,28494.0,28487.0,28497.0,28594.0,28591.0,28594.0
mean,0.38782,0.129453,0.060188,0.134658,0.040215,0.749598,0.090098,31.265974
std,0.489633,0.33914,0.24266,0.344741,0.202275,0.932723,0.286328,0.808048
min,-2.0,-2.0,-2.0,-2.0,-2.0,-10.0,0.0,30.15
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.65
50%,0.0,0.0,0.0,0.0,0.0,1.0,0.0,30.65
75%,1.0,0.0,0.0,0.0,0.0,1.0,0.0,32.11
max,1.0,1.0,1.0,1.0,1.0,5.0,1.0,32.66


In [318]:
table = pd.DataFrame(index=np.arange(1,9),columns=outcomes)

## Weighted prevalences

In [319]:
for v in table.columns:
    for c in table.index: 
        table.loc[c,v] = (df.loc[df.cid==c,v] * df.loc[df.cid==c,'wgid_w1']).sum()/df.loc[df.cid==c,'wgid_w1'].sum()

In [320]:
table

Unnamed: 0,rhibpe_w1,rdiabe_w1,rlunge_w1,rhearte_w1,rstroke_w1,tot_cond,radla_w1,ex50
1,0.338332,0.105475,0.042372,0.090277,0.033663,0.610119,0.06687,31.14
2,0.260704,0.079245,0.025064,0.109462,0.02762,0.502094,0.054418,32.11
3,0.238739,0.074387,0.061635,0.090995,0.034944,0.5007,0.051532,31.22
4,0.288678,0.124678,0.044849,0.077439,0.018381,0.554025,0.069142,32.26
5,0.349513,0.106708,0.059763,0.082135,0.020997,0.619116,0.075021,32.66
6,0.256112,0.086369,0.047844,0.102731,0.022951,0.516008,0.072206,32.56
7,0.277222,0.066038,0.059248,0.063447,0.037674,0.503629,0.069422,30.15
8,0.445441,0.147332,0.068621,0.159019,0.044803,0.865216,0.108223,30.65


## Table with European average

In [321]:
table_sum = pd.DataFrame(index=[9,10],columns=outcomes)

In [322]:
df['europe'] = np.where(df['cid']!=8,9,10)


In [323]:
for v in table_sum.columns:
    for c in table_sum.index: 
        d = df.loc[(df.europe==c) & (~df[v].isna()),:]
        table_sum.loc[c,v] = (d[v] * d['wgid_w1']).sum()/d['wgid_w1'].sum()

In [324]:
table_sum

Unnamed: 0,rhibpe_w1,rdiabe_w1,rlunge_w1,rhearte_w1,rstroke_w1,tot_cond,radla_w1,ex50
9,0.309545,0.101905,0.048994,0.090082,0.026521,0.572638,0.068951,31.959003
10,0.445854,0.147463,0.068666,0.159199,0.044831,0.865216,0.108248,30.65


## Collecting and cleaning up

In [325]:
table = table.append(table_sum.loc[9,:])

In [326]:
map_cid = {1: 'Germany',2: 'Sweden',3: 'Netherlands',4:'Spain',5:'Italy',6:'France',7:'Denmark',8:'United States',9:'Europe'}

In [327]:
table.index = table.index.to_series().replace(map_cid)

In [328]:
table.columns = ['hypertension','diabetes','lung','heart','stroke','total cond.','ADLs','Life exp (50)']

In [329]:
for c in table.columns:
    table[c] = table[c].astype('float64')

In [330]:
table = table.loc[['Germany','Sweden','Netherlands','Spain','Italy','France','Denmark','Europe','United States'],:]

In [331]:
table.round(3)

Unnamed: 0,hypertension,diabetes,lung,heart,stroke,total cond.,ADLs,Life exp (50)
Germany,0.338,0.105,0.042,0.09,0.034,0.61,0.067,31.14
Sweden,0.261,0.079,0.025,0.109,0.028,0.502,0.054,32.11
Netherlands,0.239,0.074,0.062,0.091,0.035,0.501,0.052,31.22
Spain,0.289,0.125,0.045,0.077,0.018,0.554,0.069,32.26
Italy,0.35,0.107,0.06,0.082,0.021,0.619,0.075,32.66
France,0.256,0.086,0.048,0.103,0.023,0.516,0.072,32.56
Denmark,0.277,0.066,0.059,0.063,0.038,0.504,0.069,30.15
Europe,0.31,0.102,0.049,0.09,0.027,0.573,0.069,31.959
United States,0.445,0.147,0.069,0.159,0.045,0.865,0.108,30.65


In [332]:
for v in table.columns:
    table.loc['$\Delta $',v] = table.loc['United States',v] - table.loc['Europe',v] 
    table.loc['$\Delta (\%)$',v] = table.loc['United States',v]/ table.loc['Europe',v] - 1
    

In [333]:
table.round(3)

Unnamed: 0,hypertension,diabetes,lung,heart,stroke,total cond.,ADLs,Life exp (50)
Germany,0.338,0.105,0.042,0.09,0.034,0.61,0.067,31.14
Sweden,0.261,0.079,0.025,0.109,0.028,0.502,0.054,32.11
Netherlands,0.239,0.074,0.062,0.091,0.035,0.501,0.052,31.22
Spain,0.289,0.125,0.045,0.077,0.018,0.554,0.069,32.26
Italy,0.35,0.107,0.06,0.082,0.021,0.619,0.075,32.66
France,0.256,0.086,0.048,0.103,0.023,0.516,0.072,32.56
Denmark,0.277,0.066,0.059,0.063,0.038,0.504,0.069,30.15
Europe,0.31,0.102,0.049,0.09,0.027,0.573,0.069,31.959
United States,0.445,0.147,0.069,0.159,0.045,0.865,0.108,30.65
$\Delta $,0.136,0.045,0.02,0.069,0.018,0.293,0.039,-1.309


## Output to LaTeX

In [334]:
table.round(3).to_latex('../tables/table_4_comparison_health.tex')

# Additional Material for Table 4 and Static Model (Not Used)

In [335]:
def wmean(x,var,wvar):
    xx = x.loc[~x[var].isna(),:]
    names = {var: (xx[var] * xx[wvar]).sum()/xx[wvar].sum()}
    return pd.Series(names, index=[var])

In [336]:
outcomes = ['rhibpe_w1','rdiabe_w1','rlunge_w1','rhearte_w1','rstroke_w1','radla_w1','hitot_w2']
table = pd.DataFrame(index=[0,1],columns=outcomes)
incs = pd.DataFrame(index=[0,1],columns=['q1','q2','q3','q4'])
for c in [0,1]:
	df_c = df.loc[df['share']==c,:]
	p99 = df_c['hitot_w2'].quantile(0.99)
	p01 = df_c['hitot_w2'].quantile(0.01)
	df_c['hitot_w2'] = df_c['hitot_w2'].clip(lower=p01,upper=p99)
	df_c.loc[:,'qinc'] = pd.qcut(df_c.loc[:,'hitot_w2'],q=4)
	for v in outcomes:
		grad = df_c.groupby('qinc').apply(wmean,var=v,wvar='wgid_w1')	
		grad.index = [x for x in range(1,5)]
		if v!='hitot_w2':
			table.loc[c,v] = (1-grad.loc[4,v])/(1-grad.loc[1,v])
		else :
			minc = grad.loc[:,v].mean()
			grad.loc[:,v] = grad.loc[:,v]/minc
			table.loc[c,v] = grad.loc[4,v]/grad.loc[1,v]
			incs.loc[c,'q1'] = grad.loc[1,v]
			incs.loc[c,'q2'] = grad.loc[2,v]
			incs.loc[c,'q3'] = grad.loc[3,v]
			incs.loc[c,'q4'] = grad.loc[4,v]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_c['hitot_w2'] = df_c['hitot_w2'].clip(lower=p01,upper=p99)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [337]:
table.index = ['US','EU']

In [338]:
table = table.loc[['EU','US'],['rhibpe_w1','rdiabe_w1','rlunge_w1','rhearte_w1','rstroke_w1','radla_w1']]
table.columns = ['Hypertension','Diabetes','Lung Disease','Heart Disease','Stroke','ADL']

This could be added to Table 4. 

In [339]:
for c in table.columns:
	table[c] = table[c].astype('float64')
table.round(3)

Unnamed: 0,Hypertension,Diabetes,Lung Disease,Heart Disease,Stroke,ADL
EU,1.166,1.094,1.041,1.032,1.018,1.061
US,1.435,1.163,1.114,1.129,1.065,1.276


In [340]:
table.round(3).to_latex('../tables/table_4_gradient.tex')

For the static models, normalized average income levels within quartiles are needed. Here is the table

In [341]:
for c in incs.columns:
	incs[c] = incs[c].astype('float64')
incs.index = ['US','EU']
incs.loc[['EU','US'],:].round(3)

Unnamed: 0,q1,q2,q3,q4
EU,0.29,0.632,0.992,2.086
US,0.209,0.521,0.93,2.339


In [342]:
incs.loc[['EU','US'],:].round(3).to_latex('../tables/table_4_income.tex')