In [27]:
import numpy as np
import pandas as pd

## Reading the Government Policy Data

In [28]:
# Reading the Government Policies dataset
policy_data = pd.read_excel('Government_Policy.xlsx')

# Preparing a dataFrame of relevant information from the whole file
policies = policy_data[['COUNTRY', 'CATEGORY', 'MEASURE', 'DATE_IMPLEMENTED']]

# Adding dates in the desired format in the dataset
policy_dates = pd.to_datetime(policies['DATE_IMPLEMENTED'],format='%Y-%m-%d').dt.date
policies['DATE_IMPLEMENTED'] = policy_dates

(policies.COUNTRY.unique())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


array(['Afghanistan', 'Albania', 'Algeria', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei Darussalam', 'Bulgaria', 'Burkina Faso', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'China, Hong Kong Special Administrative Region', 'Colombia',
       'Comoros', 'Congo', 'Congo DR', 'Costa Rica', "Côte d'Ivoire",
       'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Czech republic',
       'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador',
       'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon',
       'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Gren

In [29]:
policy_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8294 entries, 0 to 8293
Data columns (total 18 columns):
ID                    8294 non-null int64
COUNTRY               8294 non-null object
ISO                   8294 non-null object
ADMIN_LEVEL_NAME      764 non-null object
PCODE                 0 non-null float64
REGION                8294 non-null object
LOG_TYPE              8294 non-null object
CATEGORY              8294 non-null object
MEASURE               8294 non-null object
TARGETED_POP_GROUP    8294 non-null object
COMMENTS              8153 non-null object
NON_COMPLIANCE        7085 non-null object
DATE_IMPLEMENTED      8073 non-null datetime64[ns]
SOURCE                8278 non-null object
SOURCE_TYPE           8281 non-null object
LINK                  8263 non-null object
ENTRY_DATE            8294 non-null datetime64[ns]
Alternative source    765 non-null object
dtypes: datetime64[ns](2), float64(1), int64(1), object(14)
memory usage: 1.1+ MB


## Reading the COVID 19 Cases Data

In [30]:
## Reading the dataset
cases = pd.read_csv('full_data.csv')

## Adding Dates in the desired format in the dataset
dates = pd.to_datetime(cases['date'], format = '%Y-%m-%d').dt.date
cases['Date'] = dates

(cases.location.unique())

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire Sint Eustatius and Saba',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Cayman Islands', 'Central African Republic', 'Chad', 'Chile',
       'China', 'Colombia', 'Comoros', 'Congo', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Curacao', 'Cyprus',
       'Czech Republic', 'Democratic Republic of Congo', 'Denmark',
       'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Ethiopia', 'Faeroe Islands', 'Falkland Isla

## Preprocessing Functions

In [31]:
def policy_data_country_preprocess(country):

    # Pulling out data for a particular country
    policies_ct = policies.loc[policies['COUNTRY'] == country]
    policies_ct = policies_ct.reset_index(drop=True)

    # Sorting Dataset by dates
    policies_ct = policies_ct.sort_values(by=['DATE_IMPLEMENTED'])
    policies_ct = policies_ct.reset_index(drop=True)

    # Deleting Duplicates
    policies_ct.drop_duplicates(subset=['COUNTRY', 'CATEGORY', 'MEASURE'], inplace = True)
    policies_ct = policies_ct.reset_index(drop=True)
    
    return(policies_ct)

def case_data_country_preprocess(country):
    
    ## Taking values for a particular country
    cases_ct = cases.loc[cases['location'] == country]
    cases_ct = cases_ct.reset_index(drop=True)

    ## Sorting by date
    # cases_ct = cases_ct.drop(columns=['dateRep'])
    cases_ct = cases_ct.sort_values(by=['Date'],ascending=True)
    cases_ct = cases_ct.reset_index(drop=True)
    
    return(cases_ct)

## Preparing Dummy variables for each Category of Measures Taken

In [53]:
def create_dummy_variables(policies_ct, cases_ct):
    # Defining arrays for each category
    Governance_and_socio_economic_measures = np.zeros(len(cases_ct))
    Social_distancing = np.zeros(len(cases_ct))
    Humanitarian_exemption = np.zeros(len(cases_ct))
    Movement_restrictions = np.zeros(len(cases_ct))
    Lockdown = np.zeros(len(cases_ct))
    Public_health_measures = np.zeros(len(cases_ct))


    row_counter = 0
    counter_1 = 0
    counter_2 = 0
    counter_3 = 0
    counter_4 = 0
    counter_5 = 0
    counter_6 = 0
    for i in range(0,len(cases_ct)):
        k = 0

        while(policies_ct['DATE_IMPLEMENTED'][k] <= cases_ct['Date'][i]):
            #---------------Block to update the counters for each category-----------------------#
            # print(i,k)
            if(policies_ct['CATEGORY'][k] == 'Governance and socio-economic measures'):
                # Update the category counter
                counter_1 += 1
                #Populate the row
                #Governance_and_socio_economic_measures[row_counter] = counter_1

            elif(policies_ct['CATEGORY'][k] == 'Social distancing'):
                counter_2 += 1
                #Social_distancing[row_counter] = counter_2

            elif(policies_ct['CATEGORY'][k] == 'Humanitarian exemption'):
                counter_3 += 1
                #Humanitarian_exemption[row_counter] = counter_3

            elif(policies_ct['CATEGORY'][k] == 'Movement restrictions'):
                counter_4 += 1
                #Movement_restrictions[row_counter] = counter_4

            elif(policies_ct['CATEGORY'][k] == 'Lockdown'):
                counter_5 += 1
                #Lockdown[row_counter] = counter_5

            elif(policies_ct['CATEGORY'][k] == 'Public health measures'):
                #print(counter_6)
                counter_6 += 1
                #Public_health_measures[row_counter] = counter_6
            #-------------------------------------xxxxxxxxxxxxxx---------------------------------#

            # Update k to check next policy date
            if(k < len(policies_ct)-1):
                k += 1
            else:
                break

        #----------Update the ith row with counts of each category------------------#    
        Governance_and_socio_economic_measures[i] = counter_1
        Social_distancing[i] = counter_2
        Humanitarian_exemption[i] = counter_3
        Movement_restrictions[i] = counter_4
        Lockdown[i] = counter_5
        Public_health_measures[i] = counter_6
        #----------------------xxxxxxxxxxxx------------------------#
    cases_ct['Lockdown'] = Lockdown
    cases_ct['Public_Health'] = Public_health_measures
    cases_ct['Governance_and_socio-economic'] = Governance_and_socio_economic_measures
    cases_ct['Humanitarian_exemption'] = Humanitarian_exemption
    cases_ct['Social_distancing'] = Social_distancing
    cases_ct['Movement_restrictions'] = Movement_restrictions
    
    return(cases_ct)

## Main

In [70]:
Italy_policy = policy_data_country_preprocess('Switzerland')
Italy_cases = case_data_country_preprocess('Switzerland')
dataset = create_dummy_variables(Italy_policy, Italy_cases)

In [71]:
dataset.to_csv('Switzerland_Covid_Policies.csv')