In [1]:
import pandas as pd
import numpy as np
import datetime

# Load 
## Covid-19 data

In [3]:
OxCGRT = pd.read_csv("https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv")
OxCGRT.loc[:,'Date'] = pd.to_datetime(OxCGRT.Date, format='%Y%m%d')

In [28]:
columns_of_interest = ['CountryName', 'CountryCode','Date', 'ConfirmedCases', 'ConfirmedDeaths',
                       'C1_School closing','C2_Workplace closing','C3_Cancel public events',
                       'C4_Restrictions on gatherings', 'C5_Close public transport','C6_Stay at home requirements',
                       'C7_Restrictions on internal movement','C8_International travel controls',
                       'E1_Income support','E2_Debt/contract relief', 'E3_Fiscal measures','E4_International support', 
                       'H1_Public information campaigns','H2_Testing policy', 'H3_Contact tracing','H4_Emergency investment in healthcare', 
                       'H5_Investment in vaccines','H6_Facial Coverings', 'H7_Vaccination policy', 'H8_Protection of elderly people', 
                       'M1_Wildcard',
                       'StringencyIndex','StringencyIndexForDisplay','StringencyLegacyIndex','StringencyLegacyIndexForDisplay', 
                       'GovernmentResponseIndex','GovernmentResponseIndexForDisplay', 
                       'ContainmentHealthIndex','ContainmentHealthIndexForDisplay', 
                       'EconomicSupportIndex','EconomicSupportIndexForDisplay']
excluded_countries = ['United Kingdom','Canada','Brazil','United States', # because have observations by regions
                      'Kosovo', 'Taiwan'] 

data = OxCGRT[~OxCGRT.CountryName.isin(excluded_countries)][columns_of_interest]

In [11]:
data.head(3)

Unnamed: 0,CountryName,CountryCode,Date,ConfirmedCases,ConfirmedDeaths,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,...,StringencyIndex,StringencyIndexForDisplay,StringencyLegacyIndex,StringencyLegacyIndexForDisplay,GovernmentResponseIndex,GovernmentResponseIndexForDisplay,ContainmentHealthIndex,ContainmentHealthIndexForDisplay,EconomicSupportIndex,EconomicSupportIndexForDisplay
0,Aruba,ABW,2020-01-01,,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Aruba,ABW,2020-01-02,,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Aruba,ABW,2020-01-03,,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Population data

In [8]:
# Source: https://data.worldbank.org/indicator/SP.POP.TOTL
population = pd.read_csv("data/population.csv",skiprows=4)
population = population[["Country Code","2019"]]
# Get country names from 'data' table to ensure identical naming
population = pd.merge(population, data[['CountryCode','CountryName']].drop_duplicates(),
                      left_on='Country Code', right_on='CountryCode')
population = population[["CountryName","2019"]].set_index("CountryName")
population.index.name = None

In [9]:
population.head()

Unnamed: 0,2019
Aruba,106314.0
Afghanistan,38041754.0
Angola,31825295.0
Albania,2854191.0
Andorra,77142.0


## Tests and vacinations data

In [None]:
tests = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/testing/covid-testing-all-observations.csv")

In [None]:
vaccine = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")

# Calculate
## Daily cases

In [29]:
for country in data.CountryName.unique():
    data.loc[(data.CountryName==country),"DailyCases"] = data[(data.CountryName==country)]["ConfirmedCases"].diff()

## 7-day-average

In [31]:
for country in data.CountryName.unique():
    data.loc[(data.CountryName==country),"Average7"] = data[(data.CountryName==country)]["ConfirmedCases"].diff().rolling(7).mean()

## 14-day-average

In [32]:
for country in data.CountryName.unique():
    data.loc[(data.CountryName==country),"Average14"] = data[(data.CountryName==country)]["ConfirmedCases"].diff().rolling(14).mean()

## Daily cases per 100K population

In [30]:
for country in data.CountryName.unique():
    data.loc[(data.CountryName==country),"DailyCases_100K"] = data[(data.CountryName==country)]["DailyCases"] / population.loc[country].values * 100000

## 7-day-average per 100K population

In [33]:
for country in data.CountryName.unique():
    data.loc[(data.CountryName==country),"Average7_100K"] = data[(data.CountryName==country)]["Average7"] / population.loc[country].values * 100000

## 14-day-average per 100K population

In [34]:
for country in data.CountryName.unique():
    data.loc[(data.CountryName==country),"Average14_100K"] = data[(data.CountryName==country)]["Average14"] / population.loc[country].values * 100000

# Save to the file

In [35]:
data.to_csv("data/OxCGRT_latest.csv")