# Task

This notebook is used to produce Table 2 in the paper. This Table computes a price decomposition of the growth in the GDP share of health expenditures. We use data from Eurostat, the BLS and the OECD for this task. 

In [1]:
import numpy as np 
from matplotlib import pyplot as plt
import pandas as pd
import csv
from pandas import read_excel
from scipy import stats
from scipy.stats import spearmanr

A month to year data conversion function. 

In [2]:
def m2y(monthly):
    size   = monthly.shape
    Tm     = size[0]
    N      = size[1]
    Ty     = int(np.round(Tm/12))
    annual = np.zeros((Ty,N))
    for kk in range(N):
        for jj in range(Ty):
            annual[jj,kk] = np.mean(monthly[jj*12:(jj+1)*12-1,kk])
    return annual

A number of parameters that need to be set for the computation. 

In [3]:
N       = 8 #8 countries
year    = np.arange(1996, 2020, 1) # sample data
T       = year.size
xsmooth = 5 # length for the MA

# Step 1: Load price series and compute growth rates. 

## US data

We load the BLS price series. 

In [4]:
file_name = '../data_sources/prices/DataPriceUS.xlsx'

my_sheet  = 'CUSR0000SA0'    # sheet name
xx_us0 = read_excel(file_name, sheet_name = my_sheet)
my_sheet  = 'CUSR0000SAM'    # sheet name
xx_us1 = read_excel(file_name, sheet_name = my_sheet)

xx_us = np.array([xx_us0.loc[11:378,'Unnamed: 3'], xx_us1.loc[11:378,'Unnamed: 3']])
xx_us = xx_us.T

  after removing the cwd from sys.path.
  


We select years, transform monthly to yearly data and then compute a moving average smoothing. We then create our index. 

In [5]:
# select 1996:m1 to 2019:m12
xx_us_m =xx_us[72:368-8,:]
# Annualized data
cpiy_us = m2y(xx_us_m)
# Moving average
cpima_us = np.zeros((24,2))
for ii in range(2):
    d = pd.Series(cpiy_us[:,ii])
    cpima_us[:,ii] = d.rolling(xsmooth,min_periods=1,center=True).mean()
# Index
cpi_us = np.zeros((24,2))
cpi_us[:,0] = cpima_us[:,0]/cpima_us[0,0]
cpi_us[:,1] = cpima_us[:,1]/cpima_us[0,1]

We compute growth rates. 

In [6]:
# Growth rates
dlcpi_us=np.log(cpi_us[1:-1,:])-np.log(cpi_us[0:-2,:])
avcpi = np.mean(dlcpi_us,0)
phat_us = np.mean(dlcpi_us[:,1])- np.mean(dlcpi_us[:,0])
print('mean 1996-2019: delta(log(cpi))')
print('     cpi    cpihealth')
print(avcpi)
print('phat = mean(cpi health) - mean(cpi)')
print(phat_us)

mean 1996-2019: delta(log(cpi))
     cpi    cpihealth
[0.01987873 0.03251502]
phat = mean(cpi health) - mean(cpi)
0.012636284479829951


We will repeat these steps for each country. 

## DE data

In [7]:
file_name = '../data_sources/prices/CPI_de.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_de0 = read_excel(file_name, sheet_name = my_sheet)

file_name = '../agghealth_replicate/data_sources/prices/CPIH_de.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_de1 = read_excel(file_name, sheet_name = my_sheet)

# select 1996:m1 to 2019:m12
xx_de = np.array([xx_de0.loc[10:305-8,'Unnamed: 1'], xx_de1.loc[10:305-8,'Unnamed: 1']])
xx_de = xx_de.T

  This is separate from the ipykernel package so we can avoid doing imports until
  import sys


## DK data

In [8]:
file_name = '../agghealth_replicate/data_sources/prices/CPI_dk.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_dk0 = read_excel(file_name, sheet_name = my_sheet)

file_name = '../agghealth_replicate/data_sources/prices/CPIH_dk.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_dk1 = read_excel(file_name, sheet_name = my_sheet)

# select 1996:m1 to 2019:m12
xx_dk = np.array([xx_dk0.loc[10:305-8,'Unnamed: 1'], xx_dk1.loc[10:305-8,'Unnamed: 1']])
xx_dk = xx_dk.T

  This is separate from the ipykernel package so we can avoid doing imports until
  import sys


## FR data

In [9]:
file_name = '../data_sources/prices/CPI_fr.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_fr0 = read_excel(file_name, sheet_name = my_sheet)

file_name = '../agghealth_replicate/data_sources/prices/CPIH_fr.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_fr1 = read_excel(file_name, sheet_name = my_sheet)

# select 1996:m1 to 2019:m12
xx_fr = np.array([xx_fr0.loc[10:305-8,'Unnamed: 1'], xx_fr1.loc[10:305-8,'Unnamed: 1']])
xx_fr = xx_fr.T

  This is separate from the ipykernel package so we can avoid doing imports until
  import sys


## IT data

In [10]:
file_name = '../data_sources/prices/CPI_it.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_it0 = read_excel(file_name, sheet_name = my_sheet)

file_name = '../data_sources/prices/CPIH_it.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_it1 = read_excel(file_name, sheet_name = my_sheet)

# select 1996:m1 to 2019:m12
xx_it = np.array([xx_it0.loc[10:305-8,'Unnamed: 1'], xx_it1.loc[10:305-8,'Unnamed: 1']])
xx_it = xx_it.T

  This is separate from the ipykernel package so we can avoid doing imports until
  import sys


## NL data

In [11]:
file_name = '../data_sources/prices/CPI_nl.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_nl0 = read_excel(file_name, sheet_name = my_sheet)

file_name = '../data_sources/prices/CPIH_nl.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_nl1 = read_excel(file_name, sheet_name = my_sheet)

# select 1996:m1 to 2019:m12
xx_nl = np.array([xx_nl0.loc[10:305-8,'Unnamed: 1'], xx_nl1.loc[10:305-8,'Unnamed: 1']])
xx_nl = xx_nl.T

  This is separate from the ipykernel package so we can avoid doing imports until
  import sys


## SE data

In [12]:
file_name = '../data_sources/prices/CPI_se.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_se0 = read_excel(file_name, sheet_name = my_sheet)

file_name = '../data_sources/prices/CPIH_se.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_se1 = read_excel(file_name, sheet_name = my_sheet)

# select 1996:m1 to 2019:m12
xx_se = np.array([xx_se0.loc[10:305-8,'Unnamed: 1'], xx_se1.loc[10:305-8,'Unnamed: 1']])
xx_se = xx_se.T

  This is separate from the ipykernel package so we can avoid doing imports until
  import sys


## SP data

In [13]:
file_name = '../data_sources/prices/CPI_sp.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_sp0 = read_excel(file_name, sheet_name = my_sheet)

file_name = '../data_sources/prices/CPIH_sp.xlsx'
my_sheet  = 'FRED Graph'    # sheet name
xx_sp1 = read_excel(file_name, sheet_name = my_sheet)

# select 1996:m1 to 2019:m12
xx_sp = np.array([xx_sp0.loc[10:305-8,'Unnamed: 1'], xx_sp1.loc[10:305-8,'Unnamed: 1']])
xx_sp = xx_sp.T

  This is separate from the ipykernel package so we can avoid doing imports until
  import sys


# Step 2: Regrouping all data

We collect all data together. 

In [14]:
xx_data = np.zeros((288,N))
xx_data = np.array([xx_de[:,0], xx_dk[:,0], xx_fr[:,0], xx_it[:,0], xx_nl[:,0], xx_se[:,0], xx_sp[:,0], xx_us_m[:,0]]).T
xx_data = xx_data.astype(float)

xxh_data = np.zeros((288,N))
xxh_data = np.array([xx_de[:,1], xx_dk[:,1], xx_fr[:,1], xx_it[:,1], xx_nl[:,1], xx_se[:,1], xx_sp[:,1], xx_us_m[:,1]]).T
xxh_data = xxh_data.astype(float)

# Step 3: inflation rates across countries

Create population weights for each countries (OECD data)

In [15]:
# Weights for European countries
weight_co = np.array([.2827, .0197, .2311, .2117, .0588, .0339, .1617])

Select the relevant sub-sample

In [16]:
# Select a subsample: 1996-2007
stops = np.where(year==2007)
stopa = stops[0]
stop  = stopa[0]

size_smpl = xx_data.shape
size_s    = int(size_smpl[0]/12)

Annualize the data

In [17]:
# Annualized data
xxy_data  = m2y(xx_data)
xxhy_data = m2y(xxh_data)

Compute a moving average

In [18]:
# Mouving average
xxyma_data = np.zeros((size_s,N))
for ii in range(8):
    d = pd.Series(xxy_data[:,ii])
    xxyma_data[:,ii] = d.rolling(xsmooth,min_periods=1,center=True).mean()
    xxyma_data[:,ii] = xxyma_data[:,ii]/xxyma_data[0,ii]

xxhyma_data = np.zeros((size_s ,N))
for ii in range(8):
    d = pd.Series(xxhy_data[:,ii])
    xxhyma_data[:,ii] = d.rolling(xsmooth,min_periods=1,center=True).mean()
    xxhyma_data[:,ii] = xxhyma_data[:,ii]/xxhyma_data[0,ii]

In [19]:
dlcpi  = np.log(xxyma_data[1:stop+1,:])-np.log(xxyma_data[0:stop,:])
dlcpih = np.log(xxhyma_data[1:stop+1,:])-np.log(xxhyma_data[0:stop,:])

cpih_avgrowth = 100*np.mean(dlcpih[0:stop,:],0)
cpi_avgrowth  = 100*np.mean(dlcpi[0:stop,:],0)
cpih_avgrowth_eu = np.sum(weight_co*cpih_avgrowth[0:7])
cpi_avgrowth_eu  = np.sum(weight_co*cpi_avgrowth[0:7])

This is the data on growth rates. 

In [20]:
print('average annual growth rate 1996-2007')
print('DE  DK  FR  IT  NL  SE  SP  US AvEU')
print('cpi_health   ')
print([np.around(cpih_avgrowth,2), np.around(cpih_avgrowth_eu,2)])
print('cpi          ')
print([np.around(cpi_avgrowth,2), np.around(cpi_avgrowth_eu,2)])
print('cpi_h - cpi  ')
print([np.around(cpih_avgrowth-cpi_avgrowth,2), np.around(cpih_avgrowth_eu-cpi_avgrowth_eu,2)])

average annual growth rate 1996-2007
DE  DK  FR  IT  NL  SE  SP  US AvEU
cpi_health   
[array([2.58, 1.29, 1.79, 2.41, 3.14, 2.51, 1.48, 3.62]), 2.19]
cpi          
[array([1.34, 1.78, 1.54, 2.06, 2.12, 1.43, 2.66, 2.33]), 1.81]
cpi_h - cpi  
[array([ 1.24, -0.49,  0.25,  0.35,  1.01,  1.08, -1.18,  1.29]), 0.38]


For comparison with previous calculations in Matlab: 
1.08   -0.50   0.25   0.274   0.92   0.92   -1.31   1.31 0.28

# Step 4: GDP shares of health expenditures

We use the GDP shares of health expenditures from OECD. 

In [21]:
file_name = '../data_sources/prices/data_regFL.xlsx'
my_sheet  = 'Sheet1'    # sheet name
data_reg = read_excel(file_name, sheet_name = my_sheet)

  This is separate from the ipykernel package so we can avoid doing imports until


In [22]:
Country_lab = ['Germany', 'Denmark', 'France', 'Italy', 'Netherlands', 'Sweden', 'Spain', 'United States']

dd = np.zeros((24,N))
for ii in range(8):
    dei  = np.where((data_reg.loc[:,'country']==Country_lab[ii]) & (data_reg.loc[:,'year']>1995))
    deia = dei[0]
    deib = deia
    dd[:,ii] = deib.tolist()


In [23]:
pmc = data_reg.loc[:,'tothlthcap']  
yrc = data_reg.loc[:,'gdp15ncucap']
yc  = data_reg.loc[:,'gdpcap']

In [24]:
datapm = np.zeros((24,N))
datayr = np.zeros((24,N))
datay  = np.zeros((24,N))

for ii in range(8):
    d0 = pd.Series(pmc[dd[:,ii].tolist()])
    datapm[:,ii] = d0.rolling(xsmooth,min_periods=1,center=True).mean()
    d1 = pd.Series(yrc[dd[:,ii].tolist()])
    datayr[:,ii] = d1.rolling(xsmooth,min_periods=1,center=True).mean()
    d2 = pd.Series(yc[dd[:,ii].tolist()])
    datay[:,ii] = d2.rolling(xsmooth,min_periods=1,center=True).mean()


Compute the deflator

In [25]:
share   = datapm/datay
gdpdef0 = datay/datayr # GDP deflator 2015
gdpdef  = gdpdef0/np.tile(gdpdef0[0,:],(T,1)) # GDP deflator 1996

datayreal96  = datay/gdpdef # GDP real, base 1996%%%%%%%%%%%
datapmreal96 = datapm/xxhyma_data # pm real, base 1996
share_real   = datapmreal96/datayreal96

shat = (np.mean(share,0)-np.mean(share_real,0))/np.mean(share_real,0)
shateu = (np.sum(weight_co*100*np.mean(share[:,0:7],0)) - np.sum(weight_co*100*np.mean(share_real[:,0:7],0)))/np.sum(weight_co*100*np.mean(share_real[:,0:7],0))

This is the data for the Table

In [26]:
print('average 1996-2007 (in %)')
print('DE  DK  FR  IT  NL  SE  SP  US AvEU')
print('pm/y   ')
print([ np.around(100*np.mean(share,0),2), np.around(np.sum(weight_co*100*np.mean(share[:,0:7],0)),2) ] )
print('m/y    ')
print([ np.around(100*np.mean(share_real,0),2), np.around(np.sum(weight_co*100*np.mean(share_real[:,0:7],0)),2) ] )
print('share  ')
print( np.around(100*shat,2), np.around(100*shateu,2))

average 1996-2007 (in %)
DE  DK  FR  IT  NL  SE  SP  US AvEU
pm/y   
[array([10.63,  9.34, 10.62,  8.26,  9.25,  9.03,  8.12, 15.04]), 9.55]
m/y    
[array([ 9.37, 10.03, 10.43,  7.82,  8.13,  8.25,  9.24, 12.42]), 9.16]
shate  
[ 13.42  -6.85   1.86   5.57  13.69   9.48 -12.1   21.09] 4.26


For comparison with previous results using  Matlab
13.39   -6.81   1.83   5.50   13.68  9.46  -12.14  21.10