In [83]:
import pandas as pd
import numpy as np

In [84]:
data_cccsl     = pd.read_csv('COVID19_data_PAPER_VERSION.csv',           sep = ';', parse_dates = ['Date'])
data_whophsm   = pd.read_csv('COVID19_data_PAPER_VERSION_WHOPHSM.csv',   sep = ';', parse_dates = ['Date'])
data_coronanet = pd.read_csv('COVID19_data_PAPER_VERSION_CORONANET.csv', sep = ';', parse_dates = ['Date'])


# various columns for Peter's analysis; additional columns for Deaths/R/...
dropcolumns1   = ['Population [million]', 'Population Density','GDP pc PPP', 'HDI', 'VA', 'PS', 'GE', 'RQ', 'RL', 'CC',  'Deaths', 'Recovered', 'Growth rate', 'R']
# need to do checks on these, drop later
dropcolumns2   = ['Country','Confirmed','Date']
# with these columns dropped, only NPIs should remain as column names


In [103]:
def get_counts(data, enddate = '2020-05-01', minconfirmed = 30):
    measure_count     = 0
    measure_cum_count = 0
    country_count     = 0
    
    # iterate over countries
    for country, cdata in data[(data['Confirmed'] >= minconfirmed) & (data['Date'] < np.datetime64(enddate))].drop(columns = dropcolumns1).groupby('Country'):
        # entries are 'True' for single date when implemented, convert to int such that entries are 0/1
        cdata_count        = cdata.drop(columns = dropcolumns2).astype(int).sum()
        
        # counting all 'True' entries = all measures
        measure_count     += cdata_count.sum()
        
        # dropping duplicate measures in a country
        measure_cum_count += cdata_count[cdata_count > 0].shape[0]
        
        # only add country if they have some measures
        if cdata_count[cdata_count > 0].shape[0] > 0:
            country_count += 1
    
    # check all columns that have entries 'True', ignore association with countries
    allmeasure_count = data.drop(columns = dropcolumns1).drop(columns = dropcolumns2).astype(int).sum()
    
    return country_count, measure_count, measure_cum_count, allmeasure_count[allmeasure_count > 0].shape[0]


In [104]:
enddate      = '2020-05-01'
minconfirmed = 30

print('CCCSL    ', get_counts(data_cccsl,     enddate, minconfirmed))
print('WHOPHSM  ', get_counts(data_whophsm,   enddate, minconfirmed))
print('CORONANET', get_counts(data_coronanet, enddate, minconfirmed))

CCCSL     (77, 3112, 1386, 46)
WHOPHSM   (205, 13155, 3650, 40)
CORONANET (194, 10489, 4731, 107)
