## List of tables:

### 1. [Table 1: Treatment table](#treatment)
### 2. [Table 2: Background characteristics for subjects](#balance)

#### Imports libraries

In [1]:
import os  #File system handling
import pandas as pd  #Dataframe handling
import statsmodels.formula.api as smf  #Regressions analysis

from statsmodels.iolib.summary2 import summary_col  #Regression output table

#### Pandas settings

In [2]:
pd.set_option("display.precision", 3)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_rows", 25)

#### Set project directory

In [3]:
PROJECT_FOLDER = os.path.dirname(os.path.dirname(os.getcwd()))
FINAL_DATA_FOLDER = os.path.join(PROJECT_FOLDER, 'data', 'final')
TABLES_FOLDER = os.path.join(PROJECT_FOLDER, 'reports', 'tables')
FIGURES_FOLDER = os.path.join(PROJECT_FOLDER, 'reports', 'figures')

#### Retrieving dataframe

In [4]:
DATA = os.path.join(
    FINAL_DATA_FOLDER, 
    'experiment_1',
    'data_final.feather'
)

df = pd.read_feather(DATA)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3076 entries, 0 to 3075
Columns: 443 entries, Age to Q80_timer
dtypes: float64(225), int64(26), object(192)
memory usage: 10.4+ MB


#### Seperate quality-concern treatments from the following main analysys

In [5]:
sel = (df['Dataset'] == 'Main')
df = df[sel]

#### Table 1: Treatment table <a id='treatment'></a>

In [6]:
treat_table = pd.crosstab(
    df['Performance_pay'], 
    df['Leadership_technique'], 
    margins=True)\
    .sort_index(ascending=False, axis=0, level=0)

path = os.path.join(TABLES_FOLDER, 'experiment_1', 'treat_table.tex')
treat_table.to_latex(path, bold_rows=True, float_format="%.0f")

treat_table

Leadership_technique,No message,Praise,Reference point,All
Performance_pay,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No piece rate,300,292,299,891
Low piece rate,295,301,295,891
High piece rate,302,297,299,898
All,897,890,893,2680


#### Table 2:  Background characterisitcs of subjects <a id='balance'></a>

In [7]:
covariates = ['Age', 'Female', 'Education', 'Mobile_device', 'Latin']
pivot_index = ['Performance_pay', 'Leadership_technique']

balance = pd.pivot_table(
    df, 
    index=pivot_index , 
    aggfunc=['mean', 'sem'], 
    values=covariates, 
    margins=True
    )\
    .rename(columns={'mean':'Mean', 'sem':'Se'}, level=0)\
    .rename(mapper=lambda x: x.capitalize(), axis=1, level=1)\
    .rename_axis(['Performance pay', 'Leadership technique'])\
    .swaplevel(axis=1)\
    .sort_index(axis=1, level=0)\
    .sort_index(ascending=False, axis=0)\
    .reindex(['No message','Praise', 'Reference point', ''], level=1)
     
obs = df.groupby(pivot_index).size()
obs['All'] = df.groupby('Treatment_str').size().sum()
balance.loc[:, ('N', '')] = obs
    
path = os.path.join(TABLES_FOLDER, 'experiment_1', 'balance_table.tex')
balance.to_latex(path, bold_rows=True, float_format="%.2f")

balance

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Age,Education,Education,Female,Female,Latin,Latin,Mobile_device,Mobile_device,N
Unnamed: 0_level_1,Unnamed: 1_level_1,Mean,Se,Mean,Se,Mean,Se,Mean,Se,Mean,Se,Unnamed: 12_level_1
Performance pay,Leadership technique,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
No piece rate,No message,36.283,0.592,3.117,0.079,0.5,0.029,1.423,0.043,0.05,0.013,300
No piece rate,Praise,36.045,0.616,3.236,0.077,0.497,0.029,1.38,0.041,0.027,0.01,292
No piece rate,Reference point,35.773,0.645,3.124,0.071,0.538,0.029,1.445,0.042,0.067,0.014,299
Low piece rate,No message,35.875,0.641,3.078,0.074,0.498,0.029,1.407,0.039,0.068,0.015,295
Low piece rate,Praise,34.488,0.555,3.07,0.077,0.502,0.029,1.412,0.044,0.04,0.011,301
Low piece rate,Reference point,35.42,0.642,3.153,0.077,0.492,0.029,1.447,0.046,0.031,0.01,295
High piece rate,No message,34.93,0.614,3.017,0.076,0.457,0.029,1.457,0.043,0.046,0.012,302
High piece rate,Praise,35.152,0.643,3.131,0.072,0.519,0.029,1.401,0.039,0.064,0.014,297
High piece rate,Reference point,36.08,0.653,3.087,0.076,0.538,0.029,1.475,0.046,0.047,0.012,299
All,,35.558,0.208,3.112,0.025,0.504,0.01,1.428,0.014,0.049,0.004,2680


#### Covariate balance using OLS <a id='balance_ols'></a>

In [8]:
ols_results = [
    smf.ols(formula=f'{variable} ~ C(Treatment_str)', data=df).fit(cov_type='HC1')
    for variable in covariates
]

auxiliary = {
    'N': lambda x: f'{x.nobs:.0f}',
    'F': lambda x: f'{x.fvalue:.3f}', 
    'P(>F)': lambda x: f'{x.f_pvalue:.3f}',
}

ols_balance = summary_col(
    ols_results,
    stars=True,
    info_dict=auxiliary,
    float_format='%.3f'
)

ols_balance = ols_balance.tables[0]\
    .rename(mapper=lambda x: x.replace('C(Treatment_str)[T.', '').replace(']', ''), axis=0)\
    .rename(mapper=lambda x: x.replace('_',' '), axis=1)\
    .rename({'Intercept': 'Constant'}, axis=0)\
    .rename_axis('Dependent variable:', axis=1)

path = os.path.join(
    TABLES_FOLDER, 
    'experiment_1', 
    'ols_balance.tex'
)

ols_balance.to_latex(
    path,
    bold_rows=False,
    float_format="%.3f"
)

ols_balance

Dependent variable:,Age,Female,Education,Mobile device,Latin
Constant,34.930***,0.457***,3.017***,0.046***,1.457***
,(0.614),(0.029),(0.076),(0.012),(0.043)
High piece rate + Praise,0.221,0.062,0.115,0.018,-0.056
,(0.890),(0.041),(0.104),(0.019),(0.058)
High piece rate + Reference point,1.150,0.082**,0.070,0.000,0.018
,(0.897),(0.041),(0.107),(0.017),(0.063)
Low piece rate + No message,0.944,0.041,0.061,0.021,-0.050
,(0.888),(0.041),(0.106),(0.019),(0.059)
Low piece rate + Praise,-0.442,0.045,0.053,-0.006,-0.045
,(0.828),(0.041),(0.108),(0.017),(0.062)


In [9]:
!jupyter nbconvert --output-dir='./docs' --to html 2_balance_and_sample.ipynb

[NbConvertApp] Converting notebook 2_balance_and_sample.ipynb to html
[NbConvertApp] Writing 593761 bytes to docs/2_balance_and_sample.html
