### List of tables:

1. [Table 1: Treatment table](#treatment)
2. [Table 2: Background characteristics for subjects](#balance)

In [1]:
%matplotlib inline

#### Imports libraries

In [2]:
import matplotlib.pyplot as plt  #plotting
import numpy as np  #matrix algebra
import os  #file system handling
import sys
import pandas as pd  #dataframe handling
import re  # regular expression
import statsmodels.api as sm  #regression analysis
import statsmodels.formula.api as smf  # regressions analysis

from linearmodels.panel import RandomEffects  #panel analysis
from matplotlib.ticker import FuncFormatter  #formating graphs
from scipy import stats  #statistics module
from statsmodels.iolib.summary2 import summary_col  #regression output table
from statsmodels.stats.anova import anova_lm  #regression anova table
from statsmodels.stats.multicomp import MultiComparison  #multi-comparisson tests

#### Turn off pandas' future warnings

In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

#### Import  tools

In [4]:
sys.path.insert(0, '../')
from tools import MyPanelModelComparison

#### Set project directory

In [5]:
PROJECT_FOLDER = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
FINAL_DATA_FOLDER = os.path.join(PROJECT_FOLDER, 'data', 'final')
TABLES_FOLDER = os.path.join(PROJECT_FOLDER, 'reports', 'tables')
FIGURES_FOLDER = os.path.join(PROJECT_FOLDER, 'reports', 'figures')

#### Set display format

In [6]:
pd.options.display.float_format = '{:.5f}'.format

#### Set plotting style

In [7]:
plt.style.use('classic')

#### Set plotting properties 

In [8]:
bar_kwargs = dict(kind='bar', color='0.3', alpha=1, lw=0.5, width=0.7)
line_kwargs = dict(kind='line', lw=2, alpha=1, legend=True)
tick_kwargs = dict(size=5, which='both', bottom=True, direction='out', labelbottom=True)
font_kwargs = dict(fontsize=10, fontweight='bold', color='k')
grid_kwargs = dict(linewidth=1, axis="y", zorder=2, antialiased=True)
xlabel_kwargs = dict(fontsize=12, labelpad=3)
ylabel_kwargs = dict(fontsize=12, labelpad=3)
e_kwargs = dict(elinewidth=2, ecolor='0.15')
legend_kwargs = dict(frameon=False)

#### Retrieving dataframe

In [9]:
DATA = os.path.join(FINAL_DATA_FOLDER, 'benefit_data.feather')
df = pd.read_feather(DATA)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3094 entries, 0 to 3093
Columns: 441 entries, age to question80_timer
dtypes: float64(223), int64(25), object(193)
memory usage: 10.4+ MB


#### Sort dataframe by treatment 

In [10]:
df = df.sort_values('treatment')

#### Exclude subjects for which the timer did not work (10 min +/- 5%)

In [11]:
broken_timer = (df.duration_work < 600 * 0.95)|(df.duration_work > 600 * 1.05)
columns = ['workerid', 'treatment_str', 'duration_work', 'counter']
df_broken = df.loc[broken_timer, columns].sort_values(by='duration_work')
df_broken.to_html(os.path.join(TABLES_FOLDER, 'broken.html'), bold_rows=True, float_format="%.2f")
display(df_broken)

Unnamed: 0,workerid,treatment_str,duration_work,counter
713,713,No piece rate + Expectations,394.0,15
2596,2596,No piece rate + Goal,503.0,59
2027,2027,High piece rate + Expectations + Clarification,634.0,35
2106,2106,High piece rate + Expectations + Clarification,636.0,0
660,660,No piece rate + Neutral,640.0,20
695,695,High piece rate + Expectations,642.0,14
392,392,Low piece rate + Neutral,654.0,13
3047,3047,No piece rate + Goal,655.0,13
577,577,High piece rate + Neutral,670.0,3
2783,2783,No piece rate + Goal,675.0,12


#### Drop subjects with broken timer 

In [12]:
df = df[~broken_timer]

#### Seperate quality concern-treatments from the following main analysys

In [13]:
sel = (df.dataset != 6)
df = df[sel]

#### Table 1: Treatment table <a id='treatment'></a>

In [14]:
index_first = pd.Index(['No piece rate', 'Low piece rate', 'High piece rate'], name='Treatment')
index_second = pd.Index(['Neutral', 'Expectations', 'Goal'], name='Treatment')

treat_table = pd.crosstab(index=df.treatment_first, columns=[df.treatment_second], margins=True)\
    .reindex(index_first.append(pd.Index(['All'])), axis=0)\
    .reindex(index_second.append(pd.Index(['All'])), axis=1)\

treat_table.to_latex(os.path.join(TABLES_FOLDER, 'treat_table.tex'), bold_rows=True, float_format="%.0f")
display(treat_table)

Unnamed: 0,Neutral,Expectations,Goal,All
No piece rate,300,292,299,891
Low piece rate,295,301,295,891
High piece rate,302,297,299,898
All,897,890,893,2680


#### Table 2:  Background characterisitcs of subjects <a id='balance'></a>

In [15]:
covariates = ['age', 'female', 'education', 'mobile_device', 'latin']
pivot_index = ['treatment_first', 'treatment_second']
balance = pd.pivot_table(df, index=pivot_index , aggfunc=['mean', 'sem'], values=covariates, margins=True)\
    .rename(columns={'mean':'Mean', 'sem':'Se'}, level=0)\
    .rename(mapper=lambda x: x.capitalize(), axis=1, level=1)\
    .rename_axis(['Performance pay', 'Leadership technique'])\
    .swaplevel(axis=1)\
    .sort_index(axis=1, level=0)\
    .sort_index(ascending=False, axis=0, level=0)

balance.to_latex(os.path.join(TABLES_FOLDER, 'balance_table.tex'),
                 bold_rows=True,
                 float_format="%.2f")
display(balance)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Age,Education,Education,Female,Female,Latin,Latin,Mobile_device,Mobile_device
Unnamed: 0_level_1,Unnamed: 1_level_1,Mean,Se,Mean,Se,Mean,Se,Mean,Se,Mean,Se
Performance pay,Leadership technique,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
No piece rate,Neutral,36.28333,0.59159,3.11667,0.0788,0.5,0.02892,1.42333,0.04266,0.05,0.0126
No piece rate,Goal,35.77258,0.64543,3.12375,0.07141,0.53846,0.02888,1.44482,0.04235,0.06689,0.01447
No piece rate,Expectations,36.04452,0.61563,3.2363,0.07737,0.49658,0.02931,1.38014,0.041,0.0274,0.00957
Low piece rate,Neutral,35.87458,0.64144,3.07797,0.07371,0.49831,0.02916,1.40678,0.03948,0.0678,0.01466
Low piece rate,Goal,35.42034,0.64206,3.15254,0.07699,0.49153,0.02916,1.44746,0.04567,0.03051,0.01003
Low piece rate,Expectations,34.48837,0.55542,3.06977,0.07715,0.50166,0.02887,1.41196,0.04401,0.03987,0.0113
High piece rate,Neutral,34.93046,0.61443,3.01656,0.07599,0.45695,0.02871,1.45695,0.04336,0.04636,0.01212
High piece rate,Goal,36.08027,0.65295,3.08696,0.07578,0.53846,0.02888,1.47492,0.04576,0.04682,0.01224
High piece rate,Expectations,35.15152,0.64335,3.13131,0.07169,0.51852,0.02904,1.40067,0.03923,0.06397,0.01422
All,,35.55821,0.20765,3.11194,0.02515,0.50448,0.00966,1.42761,0.01422,0.04888,0.00417


#### Covariate balance using OLS <a id='balance_ols'></a>

In [16]:
ols_results = []
for variable in covariates:
    reg = '{} ~ C(treatment_str)'.format(variable)
    ols = smf.ols(formula=reg, data=df).fit()
    ols = ols.get_robustcov_results()
    ols_results.append(ols)

auxiliary = {
    'N': lambda x: "{:d}".format(int(x.nobs)),
    'R2': lambda x: "{:.3f}".format(x.rsquared),
    'P': lambda x: "{:.3f}".format(x.f_pvalue.item()),
}

ols_balance = summary_col(
    ols_results,
    stars=True,
    info_dict=auxiliary,
    float_format='%.3f'
)

ols_balance = ols_balance\
    .tables[0]\
    .rename(mapper=lambda x: x.replace('C(treatment_str)[T.', '').replace(']', ''), axis=0)\
    .rename(mapper=lambda x: x.capitalize().replace('_',' '), axis=1)\
    .rename({'Intercept':'Constant'})

ols_balance.to_latex(
    os.path.join(TABLES_FOLDER, 'ols_balance.tex'),
    bold_rows=True,
    float_format="%.3f"
)
display(ols_balance)

Unnamed: 0,Age,Female,Education,Mobile device,Latin
Constant,35.152***,0.519***,3.131***,0.064***,1.401***
,(0.643),(0.029),(0.072),(0.014),(0.039)
High piece rate + Goal,0.929,0.020,-0.044,-0.017,0.074
,(0.917),(0.041),(0.104),(0.019),(0.060)
High piece rate + Neutral,-0.221,-0.062,-0.115,-0.018,0.056
,(0.890),(0.041),(0.104),(0.019),(0.058)
Low piece rate + Expectations,-0.663,-0.017,-0.062,-0.024,0.011
,(0.850),(0.041),(0.105),(0.018),(0.059)
Low piece rate + Goal,0.269,-0.027,0.021,-0.033*,0.047
,(0.909),(0.041),(0.105),(0.017),(0.060)


In [24]:
df.duration_survey.describe() / 60

count   44.66667
mean    12.98039
std      3.68778
min     10.55000
25%     11.48333
50%     11.97500
75%     12.88333
max     51.60000
Name: duration_survey, dtype: float64