In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('EDA_summary.csv')

In [3]:
df.columns

Index(['Unnamed: 0', 'toughest', 'highest_mort'], dtype='object')

In [4]:
df = df.rename(columns={'Unnamed: 0':'Country', 'toughest':'Stringency', 'highest_mort':'ExcessMortality'})

In [5]:
df.head()

Unnamed: 0,Country,Stringency,ExcessMortality
0,Aruba,42.580187,10423.084396
1,Afghanistan,34.697103,7564.492568
2,Angola,57.861574,4027.033357
3,Albania,51.355463,23202.617972
4,Andorra,44.620094,12698.51806


In [6]:
df.describe()

Unnamed: 0,Stringency,ExcessMortality
count,181.0,182.0
mean,50.264284,16212.674144
std,11.541746,71543.900393
min,9.738585,-9976.005941
25%,43.887714,2658.635608
50%,50.566765,7355.503148
75%,59.529907,13836.440283
max,72.992336,864755.834482


In [7]:
df.set_index('Country', inplace = True)

Removing several countries that appear to be outliers in terms of excess mortality.

In [8]:
df = df.drop(['United States', 'Brazil','United Kingdom','China','Canada','Comoros'])

In [9]:
df.describe()

Unnamed: 0,Stringency,ExcessMortality
count,176.0,176.0
mean,50.161972,8328.2372
std,11.660835,8404.603469
min,9.738585,-9976.005941
25%,43.478237,2144.828424
50%,50.417814,7081.372155
75%,59.641846,13549.0844
max,72.992336,37323.011146


In [10]:
df = df.reset_index()

In [11]:
df = df.sort_values(by=['Stringency'])

In [12]:
df.head()

Unnamed: 0,Country,Stringency,ExcessMortality
116,Nicaragua,9.738585,18579.366803
10,Burundi,13.485849,6801.989215
164,Tanzania,17.380476,4779.002252
85,Kiribati,23.013426,-3084.660934
114,Niger,24.142315,4810.27583


In [13]:
df = df.reset_index()

In [14]:
df['Quantile'] = pd.qcut(df['Stringency'], q = 3, labels = False)

In [15]:
print(df)

     index    Country  Stringency  ExcessMortality  Quantile
0      116  Nicaragua    9.738585     18579.366803         0
1       10    Burundi   13.485849      6801.989215         0
2      164   Tanzania   17.380476      4779.002252         0
3       85   Kiribati   23.013426     -3084.660934         0
4      114      Niger   24.142315      4810.275830         0
..     ...        ...         ...              ...       ...
171    106    Myanmar   67.350841      7343.971004         2
172      6  Argentina   68.968208     13817.367761         2
173    124       Peru   69.228738     37323.011146         2
174    169  Venezuela   71.645472      8628.576493         2
175     66   Honduras   72.992336     21923.682715         2

[176 rows x 5 columns]


In [16]:
df_high = df[df['Quantile'] >= 1].drop(['index'], axis=1)

In [17]:
df_low = df[df['Quantile'] <= 1].drop(['index'], axis=1)

In [18]:
df_high.describe()

Unnamed: 0,Stringency,ExcessMortality,Quantile
count,117.0,117.0,117.0
mean,56.617161,9507.267051,1.504274
std,6.748646,8004.777273,0.502132
min,46.183925,-9976.005941,1.0
25%,50.467778,3964.680896,1.0
50%,56.243645,8628.576493,2.0
75%,61.445741,14014.637907,2.0
max,72.992336,37323.011146,2.0


In [19]:
df_low.describe()

Unnamed: 0,Stringency,ExcessMortality,Quantile
count,117.0,117.0,117.0
mean,44.014912,7931.834282,0.495726
std,9.157754,8832.561284,0.502132
min,9.738585,-9976.005941,0.0
25%,40.423056,452.768418,0.0
50%,46.146168,6801.989215,0.0
75%,50.36785,12436.976922,1.0
max,56.194167,31299.035212,1.0


Converting qunatile label into treatment label, with a zero indicating no treatment and a one indicating treatment.

In [20]:
df_high = df_high.rename(columns={'Quantile' : 'Treated'})

In [21]:
df_high['Treated'] = df_high['Treated'].replace(1,0)

In [22]:
df_high['Treated'] = df_high['Treated'].replace(2,1)

In [23]:
df_low = df_low.rename(columns={'Quantile' : 'Treated'})

In [24]:
df_low['Treated'] = df_low['Treated'].replace(0,2)

In [25]:
df_low['Treated'] = df_low['Treated'].replace(1,0)

In [26]:
df_low['Treated'] = df_low['Treated'].replace(2,1)

In [31]:
df_high.to_csv('High_Stringency.csv')
df_low.to_csv('Low_Stringency.csv')