#### Import modules

In [1]:
import os  # File system handling
import pandas as pd  # Dataframe handling

#### Set project folders

In [2]:
PROJECT_FOLDER = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
RAW_DATA_FOLDER = os.path.join(PROJECT_FOLDER, 'data', 'raw')
PROCESSED_DATA_FOLDER = os.path.join(PROJECT_FOLDER, 'data', 'processed')
FINAL_DATA_FOLDER = os.path.join(PROJECT_FOLDER, 'data', 'final')

#### Load dataframe

In [3]:
DATA =  os.path.join(
    PROCESSED_DATA_FOLDER, 
    'fair_advice.feather'
)

df = pd.read_feather(DATA)
df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3598 entries, 0 to 3597
Data columns (total 19 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Dataset                   3598 non-null   int64         
 1   SubmitTime                3598 non-null   datetime64[ns]
 2   Duration                  3598 non-null   int64         
 3   Controlquestion           1799 non-null   float64       
 4   Controlquestion_attempts  1799 non-null   float64       
 5   Treatment                 3598 non-null   int64         
 6   Treatment_name            3598 non-null   object        
 7   Dictator                  3598 non-null   int64         
 8   Decision                  3598 non-null   int64         
 9   Advise                    1799 non-null   float64       
 10  Follow                    1799 non-null   float64       
 11  Belief1                   598 non-null    float64       
 12  Belief2             

#### Make treatment indicator variables for each main effect

In [4]:
df['Risk'] = df['Treatment'].isin([2,4,6]).astype(int)
df['Base'] = df['Treatment'].isin([1,2]).astype(int)
df['Binding'] = df['Treatment'].isin([3,4]).astype(int)
df['Free'] = df['Treatment'].isin([5,6]).astype(int)

#### Make treatment indicator variables for each treatment axis

In [5]:
df['Risk_type'] = df['Risk'].replace({0:'No Risk', 1:'Risk'})
df.loc[df['Base'] == 1, 'Advice_type'] = 'Base'
df.loc[df['Binding']  == 1, 'Advice_type'] = 'Binding'
df.loc[df['Free']  == 1, 'Advice_type'] = 'Free'

### Make INdicator variables for above median age, education and political orientation 

In [6]:
for cov in ['Age', 'Education', 'Political']:
    df[f'{cov}_m'] = (df[cov] > df[cov].median()).astype(int)

#### Save dataframe

In [7]:
df.info(verbose=True)
path = os.path.join(FINAL_DATA_FOLDER, 'fair_advice_final.feather')
df.to_feather(path)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3598 entries, 0 to 3597
Data columns (total 28 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Dataset                   3598 non-null   int64         
 1   SubmitTime                3598 non-null   datetime64[ns]
 2   Duration                  3598 non-null   int64         
 3   Controlquestion           1799 non-null   float64       
 4   Controlquestion_attempts  1799 non-null   float64       
 5   Treatment                 3598 non-null   int64         
 6   Treatment_name            3598 non-null   object        
 7   Dictator                  3598 non-null   int64         
 8   Decision                  3598 non-null   int64         
 9   Advise                    1799 non-null   float64       
 10  Follow                    1799 non-null   float64       
 11  Belief1                   598 non-null    float64       
 12  Belief2             

#### Convert to html

In [8]:
!jupyter nbconvert --output-dir='./docs' --to html 1_make_dataset.ipynb

[NbConvertApp] Converting notebook 1_make_dataset.ipynb to html
[NbConvertApp] Writing 591467 bytes to docs/1_make_dataset.html
