Importing modules

In [2]:
import numpy as np
import pandas as pd

Defining custom functions

In [3]:
def display_columns():
  print("Number of columns: " + str(len(df.columns)))
  print(df.columns)

In [4]:
def display_summary():
  columns=list(df.columns)
  columns_cnt=len(columns)
  rows_cnt=len(df.index)

  print("Number of rows: " + str(rows_cnt))
  print("Number of columns: " + str(columns_cnt))
  print("Used columns: ")
  print(columns)

In [5]:
def rename_columns():
  new_headers = {
      # Closures & containment
      "C1M_School closing" : "C1M",
      "C2M_Workplace closing" : "C2M",
      "C3M_Cancel public events" : "C3M",
      "C4M_Restrictions on gatherings" : "C4M",
      "C5M_Close public transport" : "C5M",
      "C6M_Stay at home requirements" : "C6M",
      "C7M_Restrictions on internal movement" : "C7M",
      "C8EV_International travel controls" : "C8M",

      # Health measures
      "H1_Public information campaigns" : "H1"
  }

  df.rename(columns=new_headers, inplace=True)

In [6]:
def filter_columns():
  # Dropping not used columns with region information & other
  region_related_cls=["CountryCode", "RegionName", "RegionCode", "Jurisdiction", "M1_Wildcard", "StringencyIndex_Average_ForDisplay"]
  df.drop(columns=region_related_cls, inplace=True)

  # Defining regexes to filter not used columns indices
  economic_filter='^E[1-4]_*'
  health_system_filter='^H[2-8][M]?_*'
  vaccination_filter='^V[1-4][A-Z]?_*'
  GRI_filter='GovernmentResponseIndex*'
  CHI_filter='ContainmentHealthIndex*'
  ESI_filter='EconomicSupportIndex*'
  filters=[economic_filter, health_system_filter, vaccination_filter, GRI_filter, CHI_filter, ESI_filter]

  for filter in filters:
    df.drop(columns=df.filter(regex=filter), inplace=True)

In [7]:
def add_previous_stringency_index():
    df['Prev_StringencyIndex_Average']=0

    for i in range(1, len(df)):
      # First row from certain country
      if df.at[i, 'CountryName'] != df.at[i-1, 'CountryName']:
        df.loc[i, 'Prev_StringencyIndex_Average'] = 0.0
      else:
        df.loc[i, 'Prev_StringencyIndex_Average'] = df.loc[i-1, 'StringencyIndex_Average']

In [8]:
def get_npi_difference():
  # Add column for Stringency Index diff value
  df['Daily_StringencyIndex_Change']=0

  for i in range(1, len(df)):
    # Create np arrays for calculating Stringency Index diff
    npis=np.zeros((1,0))
    npis_flag=np.zeros((1,0))

    # First row from certain country
    if df.at[i, 'CountryName'] != df.at[i-1, 'CountryName']:

      for npi in subset:
        npis = np.append(npis, [df.at[i, npi]])
      for flag in flag_subset:
        npis_flag = np.append(npis_flag, [df.at[i, flag]])
    
    else:
      # Check whether any NPI value has changed
      for npi in subset:
        if df.at[i-1, npi] != df.at[i, npi]:
          npis = np.append(npis, [df.at[i, npi]])
        else:
          npis = np.append(npis, [0])

      # Check whether any NPI flag has changed
      for flag in flag_subset:
        if df.at[i-1, flag] != df.at[i, flag]:
          npis_flag = np.append(npis_flag, [df.at[i, flag]])
        else: 
          npis_flag = np.append(npis, [0])

      daily_si=calculate_simplified_si(npis)
      df.at[i, 'Daily_StringencyIndex_Change'] = daily_si


In [9]:
# Do not consider flags while calculating Stringency Index
def calculate_simplified_si(npis):
  # Max values of npis
  max_npi_values=[3,3,2,4,2,3,2,4,2]                                    
  si=0

  for i in range(0, len(npis)):
    si = si + (npis[i] / max_npi_values[i])
    
  si=si/len(npis) 
  return si

In [10]:
def calculate_si():
  pass

**[STEP 1]** Read data from CSV

In [11]:
path="./OxCGRT_nat_latest.csv"

df=pd.read_csv(path)

display_summary()

Number of rows: 202819
Number of columns: 61
Used columns: 
['CountryName', 'CountryCode', 'RegionName', 'RegionCode', 'Jurisdiction', 'Date', 'C1M_School closing', 'C1M_Flag', 'C2M_Workplace closing', 'C2M_Flag', 'C3M_Cancel public events', 'C3M_Flag', 'C4M_Restrictions on gatherings', 'C4M_Flag', 'C5M_Close public transport', 'C5M_Flag', 'C6M_Stay at home requirements', 'C6M_Flag', 'C7M_Restrictions on internal movement', 'C7M_Flag', 'C8EV_International travel controls', 'E1_Income support', 'E1_Flag', 'E2_Debt/contract relief', 'E3_Fiscal measures', 'E4_International support', 'H1_Public information campaigns', 'H1_Flag', 'H2_Testing policy', 'H3_Contact tracing', 'H4_Emergency investment in healthcare', 'H5_Investment in vaccines', 'H6M_Facial Coverings', 'H6M_Flag', 'H7_Vaccination policy', 'H7_Flag', 'H8M_Protection of elderly people', 'H8M_Flag', 'M1_Wildcard', 'V1_Vaccine Prioritisation (summary)', 'V2A_Vaccine Availability (summary)', 'V2B_Vaccine age eligibility/availability 

**[STEP 2]** Deleting not used and irrelevant columns

In [12]:
filter_columns()

**[STEP 3]** Drop rows with NPI=Nan

In [13]:
df = df[df['StringencyIndex_Average'].notna()]

**[STEP 4]** Rename columns to improve readibility

In [14]:
rename_columns()

**[STEP 5]** Deleting rows where none of NPIs are defined

In [15]:
subset=["C1M", "C2M", "C3M", "C4M", "C5M", "C6M", "C7M", "C8M", "H1"]
flag_subset=["C1M_Flag", "C2M_Flag", "C3M_Flag", "C4M_Flag", "C5M_Flag", "C6M_Flag", "C7M_Flag", "H1_Flag"]
df.dropna(subset=flag_subset, how='all', inplace=True)
df.fillna(0, inplace=True)

**[STEP 6]** Delete redundant rows

In [16]:
compared_columns=df.columns.tolist()

compared_columns.remove('Date')
compared_columns.remove('ConfirmedCases')
compared_columns.remove('ConfirmedDeaths')
compared_columns.remove('MajorityVaccinated')
compared_columns.remove('PopulationVaccinated')

df.drop_duplicates(subset=compared_columns, inplace=True)

**[STEP 7]** Copy Stringency Index from previous day 

In [17]:
df.reset_index(drop=True, inplace=True)
add_previous_stringency_index()

**[STEP 8]** Calculate Stringency Index of NPI established certain day

In [18]:
get_npi_difference()

**[STEP 9]** Show results

In [19]:
df.head()
print(df['Daily_StringencyIndex_Change'].max())

0.8518518518518519


**[STEP 10]** Add continent column

In [20]:
continents_df = pd.read_csv("./Countries-Continents.csv")

In [21]:
continents_dict = dict(zip(continents_df.Country, continents_df.Continent))

In [22]:
df['Continent'] = df['CountryName'].map(continents_dict)

In [23]:
continent_col = df.pop('Continent')
df.insert(0,'Continent',continent_col)

In [24]:
df[df['Continent']== 'Europe']

Unnamed: 0,Continent,CountryName,Date,C1M,C1M_Flag,C2M,C2M_Flag,C3M,C3M_Flag,C4M,...,C8M,H1,H1_Flag,ConfirmedCases,ConfirmedDeaths,MajorityVaccinated,PopulationVaccinated,StringencyIndex_Average,Prev_StringencyIndex_Average,Daily_StringencyIndex_Change
153,Europe,Albania,20200214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,NV,0.00,5.56,0.00,0.000000
154,Europe,Albania,20200225,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,0.0,0.0,NV,0.00,8.33,5.56,0.027778
155,Europe,Albania,20200309,3.0,1.0,0.0,0.0,2.0,1.0,2.0,...,1.0,1.0,1.0,2.0,0.0,NV,0.00,36.11,8.33,0.277778
156,Europe,Albania,20200310,3.0,1.0,0.0,0.0,2.0,1.0,2.0,...,1.0,2.0,1.0,10.0,0.0,NV,0.00,41.67,36.11,0.111111
157,Europe,Albania,20200311,3.0,1.0,2.0,1.0,2.0,1.0,2.0,...,2.0,2.0,1.0,12.0,1.0,NV,0.00,51.85,41.67,0.129630
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16265,Europe,Ukraine,20220224,1.0,1.0,2.0,0.0,2.0,0.0,0.0,...,1.0,0.0,0.0,5040518.0,112459.0,NV,38.14,32.53,43.64,0.000000
16266,Europe,Ukraine,20220227,1.0,1.0,2.0,0.0,2.0,0.0,0.0,...,1.0,0.0,0.0,5062889.0,112835.0,NV,38.17,32.52,32.53,0.000000
16267,Europe,Ukraine,20220317,1.0,1.0,2.0,0.0,2.0,0.0,0.0,...,1.0,1.0,1.0,5153795.0,114366.0,NV,38.17,38.07,32.52,0.055556
16268,Europe,Ukraine,20220326,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,5187213.0,114793.0,NV,38.17,10.05,38.07,0.000000


**[STEP 11]** Save DataFrame with results to csv

In [25]:
df.to_csv('OxCGRT_clean.csv', encoding = 'utf-8-sig', index = False)