In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

In [2]:
df1 = pd.read_csv('High_Stringency.csv').sort_values(by=['ExcessMortality'])
df2 = pd.read_csv('Low_Stringency.csv').sort_values(by=['ExcessMortality'])

In [3]:
print(df2)

     Unnamed: 0         Country  Stringency  ExcessMortality  Treated
93           93      Seychelles   51.148796     -9976.005941        0
16           16  Faeroe Islands   34.277290     -7225.746708        1
20           20       Greenland   36.360278     -4818.442115        1
28           28     New Zealand   40.357963     -3414.487613        1
24           24    Sierra Leone   38.872037     -3256.684001        1
..          ...             ...         ...              ...      ...
60           60          Serbia   46.201215     27482.327051        0
9             9         Belarus   29.322264     27578.139516        1
114         114    Saudi Arabia   55.820291     27671.693193        0
69           69          Russia   47.802430     29154.155597        0
44           44        Bulgaria   43.496415     31299.035212        1

[117 rows x 5 columns]


In [4]:
df1post = df1.drop(['Unnamed: 0', 'Stringency','Country'], axis=1)

In [5]:
df2post = df2.drop(['Unnamed: 0', 'Stringency'], axis=1)

In [6]:
df1post["Time"] = 1
df2post['Time'] = 1

In [7]:
print(df1post)

     ExcessMortality  Treated  Time
34      -9976.005941        0     1
66      -3090.255654        1     1
67      -2363.601985        1     1
47      -2358.657296        0     1
86      -2328.007357        1     1
..               ...      ...   ...
39      26282.470927        0     1
1       27482.327051        0     1
55      27671.693193        0     1
10      29154.155597        0     1
114     37323.011146        1     1

[117 rows x 3 columns]


First we'll run a difference in difference test on our high stringency group, using the middle quantile as a control and high quantile as the treatment. Then, we'll run the same test using middle quantile as the control and the low quantile from our low stringency group. This should give us a good idea if differences in the stringency of a country's response resulted in a difference in the excess mortality of that country.

In [8]:
df1pre = df1post.copy()
df2pre = df2post.copy()

In [9]:
for col in df1pre.columns:
    df1pre['ExcessMortality'].values[:] = 0

In [10]:
for col in df1pre.columns:
    df1pre['Time'].values[:] = 0

In [11]:
for col in df2pre.columns:
    df2pre['ExcessMortality'].values[:] = 0

In [12]:
for col in df2pre.columns:
    df2pre['Time'].values[:] = 0

In [13]:
df1 = pd.concat([df1pre, df1post])
df2 = pd.concat([df2pre, df2post])

In [14]:
print(df2)

            Country  ExcessMortality  Treated  Time
93       Seychelles         0.000000        0     0
16   Faeroe Islands         0.000000        1     0
20        Greenland         0.000000        1     0
28      New Zealand         0.000000        1     0
24     Sierra Leone         0.000000        1     0
..              ...              ...      ...   ...
60           Serbia     27482.327051        0     1
9           Belarus     27578.139516        1     1
114    Saudi Arabia     27671.693193        0     1
69           Russia     29154.155597        0     1
44         Bulgaria     31299.035212        1     1

[234 rows x 4 columns]


In [15]:
df1['did'] = df1['Time'] * df1['Treated']
df2['did'] = df2['Time'] * df2['Treated']

In [16]:
x = df1[['Treated','Time','did']]

In [17]:
y = df1['ExcessMortality']

In [18]:
model = LinearRegression().fit(x, y)

In [19]:
model.coef_

array([ 1.90264641e-12,  9.90698463e+03, -7.92660292e+02])

In [20]:
X2 = sm.add_constant(x)
est = sm.OLS(y, X2)
est2 = est.fit()
print(est2.summary())

                            OLS Regression Results                            
Dep. Variable:        ExcessMortality   R-squared:                       0.417
Model:                            OLS   Adj. R-squared:                  0.410
Method:                 Least Squares   F-statistic:                     54.87
Date:                Tue, 01 Feb 2022   Prob (F-statistic):           8.67e-27
Time:                        13:58:09   Log-Likelihood:                -2352.8
No. Observations:                 234   AIC:                             4714.
Df Residuals:                     230   BIC:                             4727.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       7.336e-14    745.526   9.84e-17      1.0

The above are the results of a higher stringency treatment. From my first reading, it looks like a higher level of stringency resulted in a mean decrease of 793.6603 deaths per 100k over the course of the test period.

In [21]:
x = df2[['Treated','Time','did']]

In [22]:
y = df2['ExcessMortality']

In [23]:
model = LinearRegression().fit(x, y)

In [24]:
model.coef_

array([ 4.75661603e-12,  9.90698463e+03, -3.91682358e+03])

In [25]:
X2 = sm.add_constant(x)
est = sm.OLS(y, X2)
est2 = est.fit()
print(est2.summary())

                            OLS Regression Results                            
Dep. Variable:        ExcessMortality   R-squared:                       0.324
Model:                            OLS   Adj. R-squared:                  0.316
Method:                 Least Squares   F-statistic:                     36.81
Date:                Tue, 01 Feb 2022   Prob (F-statistic):           1.82e-19
Time:                        13:58:09   Log-Likelihood:                -2370.2
No. Observations:                 234   AIC:                             4748.
Df Residuals:                     230   BIC:                             4762.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -1.399e-12    802.962  -1.74e-15      1.0

Above is the result of a lower stringency. Here it looks like a lower stringency led to a mean decrease of 3916.8236 individuals per 100k over the test period.