In [74]:
import numpy as np
import pandas as pd

## Reading the Government Policy Data

In [98]:
# Reading the Government Policies dataset
policy_data = pd.read_excel('Goverment_Policy.xlsx')

# Preparing a dataFrame of relevant information from the whole file
policies = policy_data[['COUNTRY', 'CATEGORY', 'MEASURE', 'DATE_IMPLEMENTED']]

# Adding dates in the desired format in the dataset
policy_dates = pd.to_datetime(policies['DATE_IMPLEMENTED'],format='%Y-%m-%d').dt.date
policies['DATE_IMPLEMENTED'] = policy_dates

policies.COUNTRY.unique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


array(['Afghanistan', 'Albania', 'Algeria', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei Darussalam', 'Bulgaria', 'Burkina Faso', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'China, Hong Kong Special Administrative Region', 'Colombia',
       'Comoros', 'Congo', 'Congo DR', 'Costa Rica', "Côte d'Ivoire",
       'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Czech republic',
       'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador',
       'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon',
       'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Gren

## Reading the COVID 19 Cases Data

In [97]:
## Reading the dataset
cases = pd.read_csv('WHO-COVID-19-global-data.csv')

## Adding Dates in the desired format in the dataset
dates = pd.to_datetime(cases['dateRep'], format = '%d/%m/%Y').dt.date
cases['Date'] = dates

cases.countriesAndTerritories.unique()

array(['Afghanistan', 'Albania', 'Andorra', 'Algeria', 'Angola',
       'Anguilla', 'Antigua_and_Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia',
       'Bonaire, Saint Eustatius and Saba', 'Bosnia_and_Herzegovina',
       'Botswana', 'Brazil', 'British_Virgin_Islands',
       'Brunei_Darussalam', 'Bulgaria', 'Burkina_Faso', 'Burundi',
       'Cambodia', 'Cameroon', 'Canada', 'Cape_Verde',
       'Cases_on_an_international_conveyance_Japan', 'Cayman_Islands',
       'Central_African_Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Congo', 'Costa_Rica', 'Cote_dIvoire', 'Croatia', 'Cuba',
       'Curaçao', 'Cyprus', 'Czechia', 'Democratic_Republic_of_the_Congo',
       'Denmark', 'Djibouti', 'Dominica', 'Dominican_Republic', 'Ecuador',
       'Egypt', 'El_Salvador', 'Equatorial_Guinea', 'Eritrea', 'Estonia',


## Preprocessing Functions

In [77]:
def policy_data_country_preprocess(country):

    # Pulling out data for a particular country
    policies_ct = policies.loc[policies['COUNTRY'] == country]
    policies_ct = policies_ct.reset_index(drop=True)

    # Sorting Dataset by dates
    policies_ct = policies_ct.sort_values(by=['DATE_IMPLEMENTED'])
    policies_ct = policies_ct.reset_index(drop=True)

    # Deleting Duplicates
    policies_ct.drop_duplicates(subset=['COUNTRY', 'CATEGORY', 'MEASURE'], inplace = True)
    policies_ct = policies_ct.reset_index(drop=True)
    
    return(policies_ct)

def case_data_country_preprocess(country):
    
    ## Taking values for a particular country
    cases_ct = cases.loc[cases['countriesAndTerritories'] == country]
    cases_ct = cases_ct.reset_index(drop=True)

    ## Sorting by date
    # cases_ct = cases_ct.drop(columns=['dateRep'])
    cases_ct = cases_ct.sort_values(by=['Date'],ascending=True)
    cases_ct = cases_ct.reset_index(drop=True)
    
    return(cases_ct)

## Preparing Dummy variables for each Category of Measures Taken

In [78]:
def create_dummy_variables(policies_ct, cases_ct):
    # Defining arrays for each category
    Governance_and_socio_economic_measures = np.zeros(len(cases_ct))
    Social_distancing = np.zeros(len(cases_ct))
    Humanitarian_exemption = np.zeros(len(cases_ct))
    Movement_restrictions = np.zeros(len(cases_ct))
    Lockdown = np.zeros(len(cases_ct))
    Public_health_measures = np.zeros(len(cases_ct))


    row_counter = 0
    counter_1 = 0
    counter_2 = 0
    counter_3 = 0
    counter_4 = 0
    counter_5 = 0
    counter_6 = 0
    for i in range(0,len(cases_ct)):
        k = 0

        while(policies_ct['DATE_IMPLEMENTED'][k] <= cases_ct['Date'][i]):
            #---------------Block to update the counters for each category-----------------------#
            # print(i,k)
            if(policies_ct['CATEGORY'][k] == 'Governance and socio-economic measures'):
                # Update the category counter
                counter_1 += 1
                #Populate the row
                #Governance_and_socio_economic_measures[row_counter] = counter_1

            elif(policies_ct['CATEGORY'][k] == 'Social distancing'):
                counter_2 += 1
                #Social_distancing[row_counter] = counter_2

            elif(policies_ct['CATEGORY'][k] == 'Humanitarian exemption'):
                counter_3 += 1
                #Humanitarian_exemption[row_counter] = counter_3

            elif(policies_ct['CATEGORY'][k] == 'Movement restrictions'):
                counter_4 += 1
                #Movement_restrictions[row_counter] = counter_4

            elif(policies_ct['CATEGORY'][k] == 'Lockdown'):
                counter_5 += 1
                #Lockdown[row_counter] = counter_5

            elif(policies_ct['CATEGORY'][k] == 'Public health measures'):
                #print(counter_6)
                counter_6 += 1
                #Public_health_measures[row_counter] = counter_6
            #-------------------------------------xxxxxxxxxxxxxx---------------------------------#

            # Update k to check next policy date
            if(k < len(policies_ct)-1):
                k += 1
            else:
                break

        #----------Update the ith row with counts of each category------------------#    
        Governance_and_socio_economic_measures[i] = counter_1
        Social_distancing[i] = counter_2
        Humanitarian_exemption[i] = counter_3
        Movement_restrictions[i] = counter_4
        Lockdown[i] = counter_5
        Public_health_measures[i] = counter_6
        #----------------------xxxxxxxxxxxx------------------------#
    cases_ct['Lockdown'] = Lockdown
    cases_ct['Public Health Measures'] = Public_health_measures
    cases_ct['Governance and socio-economic measures'] = Governance_and_socio_economic_measures
    cases_ct['Humanitarian exemption'] = Humanitarian_exemption
    cases_ct['Social distancing'] = Social_distancing
    cases_ct['Movement restrictions'] = Movement_restrictions
    
    return(cases_ct)

## Main

In [101]:
Italy_policy = policy_data_country_preprocess('Italy')
Italy_cases = case_data_country_preprocess('Italy')
dataset = create_dummy_variables(Italy_policy, Italy_cases)

In [102]:
dataset.to_csv('Italy_Covid_Policies.csv')