In [1]:
import numpy as np


In [2]:
import pandas as pd
NPIS_proc = pd.read_csv('data/NPIS_LC_processed.csv')

In [3]:
NPIS_proc.drop('Unnamed: 0', axis=1)

Unnamed: 0,Date,Compulsory_Mask,Eat_In_Ban,Entry_Recording,Lockdown,Sch_Closure,Gathering_Max,Mall_Distancing,Recreation,Workplace_Closure,Worship
0,2020-01-23,0,0,0,0,0,150.0,1.15,1.00,1.00,1.00
1,2020-01-24,0,0,0,0,0,150.0,1.15,1.00,1.00,1.00
2,2020-01-25,0,0,0,0,0,150.0,1.15,1.00,1.00,1.00
3,2020-01-26,0,0,0,0,0,150.0,1.15,1.00,1.00,1.00
4,2020-01-27,0,0,0,0,0,150.0,1.15,1.00,1.00,1.00
...,...,...,...,...,...,...,...,...,...,...,...
569,2021-08-14,1,1,1,0,0,2.0,10.00,0.25,0.33,0.25
570,2021-08-15,1,1,1,0,0,2.0,10.00,0.25,0.33,0.25
571,2021-08-16,1,1,1,0,0,2.0,10.00,0.25,0.33,0.25
572,2021-08-17,1,1,1,0,0,2.0,10.00,0.25,0.33,0.25


# General rule of thumb 
- To transform everything to 0-1 scale
- Only gathering max and mall distancing should be touched

### Options for normalizing gathering max
1. Take reciprocal base 2. Reciprocal alone is not feasible due to 0
2. Take reciprocal base e to make sure gathering of 150 gets driven to non-existence

### Options for normalizing mall distancing 
1. rescaling by dividing all values by 32

# Stretch attempt. Convert Recreation, Workplace_Closure and Worship to factors with multiple dummy variables?


## Rescaling mall distancing 

In [4]:
NPIS_proc_rescaled_base = NPIS_proc.copy().drop("Unnamed: 0", axis = 1)

In [5]:
NPIS_proc_rescaled_base["Mall_Distancing"] = NPIS_proc_rescaled_base["Mall_Distancing"]/32

In [6]:
NPIS_proc_rescaled_base

Unnamed: 0,Date,Compulsory_Mask,Eat_In_Ban,Entry_Recording,Lockdown,Sch_Closure,Gathering_Max,Mall_Distancing,Recreation,Workplace_Closure,Worship
0,2020-01-23,0,0,0,0,0,150.0,0.035937,1.00,1.00,1.00
1,2020-01-24,0,0,0,0,0,150.0,0.035937,1.00,1.00,1.00
2,2020-01-25,0,0,0,0,0,150.0,0.035937,1.00,1.00,1.00
3,2020-01-26,0,0,0,0,0,150.0,0.035937,1.00,1.00,1.00
4,2020-01-27,0,0,0,0,0,150.0,0.035937,1.00,1.00,1.00
...,...,...,...,...,...,...,...,...,...,...,...
569,2021-08-14,1,1,1,0,0,2.0,0.312500,0.25,0.33,0.25
570,2021-08-15,1,1,1,0,0,2.0,0.312500,0.25,0.33,0.25
571,2021-08-16,1,1,1,0,0,2.0,0.312500,0.25,0.33,0.25
572,2021-08-17,1,1,1,0,0,2.0,0.312500,0.25,0.33,0.25


## Transforming Gathering Max (Option 1)

In [13]:
NPIS_proc_rescaled = NPIS_proc_rescaled_base.copy()

In [14]:
NPIS_proc_rescaled["Gathering_Max"] = 1/2**NPIS_proc_rescaled["Gathering_Max"]

In [15]:
NPIS_proc_rescaled.to_csv('data/NPIS_LC_processed_V3.csv')

## Change allowance covariates to restrictions

In [16]:
col_names = ["Recreation", "Workplace_Closure", "Worship"]
for col in col_names:
    NPIS_proc_rescaled[col] = 1 - NPIS_proc_rescaled[col]

In [18]:
NPIS_proc_rescaled.to_csv('data/NPIS_LC_processed_V7.csv')

## Transforming Gathering Max (Option 2)

In [10]:
NPIS_proc_rescaled2 = NPIS_proc_rescaled_base.copy()

In [11]:
NPIS_proc_rescaled2["Gathering_Max"] = 1/np.exp(NPIS_proc_rescaled2["Gathering_Max"])

In [12]:
NPIS_proc_rescaled2.to_csv('data/NPIS_LC_processed_V4.csv')

## Multicategorical variable

In [13]:
NPIS_proc_rescaled_cat = NPIS_proc_rescaled.copy()
NPIS_proc_rescaled2_cat = NPIS_proc_rescaled2.copy()
NPIS_proc_rescaled_drop = NPIS_proc_rescaled.copy()
NPIS_proc_rescaled_drop = NPIS_proc_rescaled_drop.drop(["Recreation", "Workplace_Closure", "Worship"], axis = 1)
NPIS_proc_rescaled2_drop = NPIS_proc_rescaled2.copy()
NPIS_proc_rescaled2_drop = NPIS_proc_rescaled2_drop.drop(["Recreation", "Workplace_Closure", "Worship"], axis = 1)

In [14]:
col_names = ["Recreation", "Workplace_Closure", "Worship"]
for col in col_names:
    NPIS_proc_rescaled_cat[col] = NPIS_proc_rescaled_cat[col].astype('category',copy=False)

In [15]:
NPIS_dummies = pd.get_dummies(NPIS_proc_rescaled_cat[["Recreation", "Workplace_Closure", "Worship"]])

In [16]:
NPIS_proc_rescaled_cat = pd.concat([NPIS_proc_rescaled_drop, NPIS_dummies], axis = 1)
NPIS_proc_rescaled2_cat = pd.concat([NPIS_proc_rescaled2_drop, NPIS_dummies], axis = 1)

In [17]:
NPIS_proc_rescaled_cat.to_csv('data/NPIS_LC_processed_V5.csv')
NPIS_proc_rescaled2_cat.to_csv('data/NPIS_LC_processed_V6.csv')