## List of tables:

### 1. [Table 1: Treatment table](#treatment)

### 2. [Table 2: Background characteristics of subjects](#balance)

#### Imports libraries

In [1]:
import matplotlib.pyplot as plt  # Plotting library
import os  #File system handling
import pandas as pd  #Dataframe handling
import statsmodels.formula.api as smf  #Regressions analysis

from statsmodels.iolib.summary2 import summary_col  #Regression output table

#### Set project directory

In [2]:
PROJECT_FOLDER = os.path.dirname(os.path.dirname(os.getcwd()))
FINAL_DATA_FOLDER = os.path.join(PROJECT_FOLDER, 'data', 'final')
TABLES_FOLDER = os.path.join(PROJECT_FOLDER, 'reports', 'tables')
FIGURES_FOLDER = os.path.join(PROJECT_FOLDER, 'reports', 'figures')

#### Pandas settings 

In [3]:
pd.set_option("display.precision", 3)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_rows", 25)

#### Set plotting style

In [4]:
plt.style.use('classic')

#### Set plotting properties 

In [5]:
bar_kw = dict(kind='bar', color='0.4', alpha=0.8, lw=0.5, width=0.7)
line_kw = dict(kind='line', lw=1, alpha=1, legend=True)
font_kw = dict(fontsize=11, color='k')
grid_kw = dict(linewidth=1, axis="y", zorder=2, antialiased=True)
x_lab_kw = dict(fontsize=11, labelpad=3)
y_lab_kw = dict(fontsize=11, labelpad=3)
error_kw = dict(elinewidth=2, ecolor='0.15')
legend_kw = dict(frameon=False)
tick_kw = dict(
    size=5,
    which='both',
    direction='out',
    right=False,
    top=False,
    labelbottom=True
)

#### Retrieving dataframe

In [6]:
DATA = os.path.join(
    FINAL_DATA_FOLDER,
    'experiment_2',
    'data_final.feather'
)
df = pd.read_feather(DATA)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1768 entries, 0 to 1767
Columns: 596 entries, Age to Q110_timer
dtypes: datetime64[ns](2), float64(309), int64(26), object(259)
memory usage: 8.0+ MB


#### Table 1: Treatment table <a id='treatment'></a>

In [7]:
treat_table = pd.crosstab(df['Goal'], df['Charisma'], margins=True)\
    .rename({0: 'No', 1:'Yes'}, axis=0)\
    .rename({0: 'No', 1:'Yes'}, axis=1)\

treat_table.to_latex(
    os.path.join(TABLES_FOLDER, 'treat_table.tex'), 
    bold_rows=True, 
    float_format="%.0f"
)

treat_table

Charisma,No,Yes,All
Goal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
No,444,442,886
Yes,438,444,882
All,882,886,1768


#### Table 2:  Background characteristics of subjects <a id='balance'></a>

In [8]:
treat_index = ['Neutral', 'Charisma without goal', 'Goal', 'Full charisma']
covariates = ['Age', 'Female', 'Education', 'Mobile_device']

balance = pd.pivot_table(
    df,
    index='Treatment_str' ,
    aggfunc=['mean', 'sem'],
    values=covariates,
    margins=True
    )\
    .rename({'mean':'Mean', 'sem':'Se'}, level=0, axis=1)\
    .rename(lambda x: x.replace('_', ' '), level=1, axis=1)\
    .rename_axis('Treatment')\
    .swaplevel(axis=1)\
    .sort_index(axis=1, level=0)\
    .reindex(treat_index + ['All'])

obs = df.groupby('Treatment_str').size()
obs['All'] = df.groupby('Treatment_str').size().sum()
balance.loc[:, 'N'] = obs

path = os.path.join(TABLES_FOLDER, 'experiment_2', 'balance_table.tex')

balance.to_latex(
    path,
    bold_rows=True,
    float_format="%.2f"
)

balance

Unnamed: 0_level_0,Age,Age,Education,Education,Female,Female,Mobile device,Mobile device,N
Unnamed: 0_level_1,Mean,Se,Mean,Se,Mean,Se,Mean,Se,Unnamed: 9_level_1
Treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Neutral,37.286,0.536,4.574,0.062,0.507,0.024,0.034,0.009,444
Charisma without goal,37.692,0.545,4.416,0.062,0.475,0.024,0.029,0.008,442
Goal,37.758,0.551,4.447,0.061,0.457,0.024,0.057,0.011,438
Full charisma,37.876,0.546,4.518,0.062,0.45,0.024,0.034,0.009,444
All,37.653,0.272,4.489,0.031,0.472,0.012,0.038,0.005,1768


#### Check covariate balance using OLS<a id='balance_ols'></a>

In [9]:
ols_results = [
    smf.ols(formula=f'{variable} ~ C(Treatment)', data=df).fit(cov_type='HC1')
    for variable in covariates
]

auxiliary = {
    'N': lambda x: f'{x.nobs:.0f}',
    'F': lambda x: f'{x.fvalue:.3f}', 
    'P(>F)': lambda x: f'{x.f_pvalue:.2f}',
}

ols_balance = summary_col(
    ols_results,
    stars=True,
    info_dict=auxiliary,
    float_format='%.3f'
)

label_index = {
    'Intercept':'Constant', 
    '2':'Charisma without Goal',   
    '3':'Goal', 
    '4':'Full charisma'
}

ols_balance = ols_balance.tables[0]\
    .rename(mapper=lambda x: x.replace('C(Treatment)[T.', '').replace(']', ''), axis=0)\
    .rename(mapper=lambda x: x.replace('_',' '), axis=1)\
    .rename(label_index, axis=0)\
    .rename_axis('Dependent variable:', axis=1)

path = os.path.join(TABLES_FOLDER, 'experiment_2', 'ols_balance.tex')
ols_balance.to_latex(
    path,
    bold_rows=False,
    float_format="%.3f"
)

ols_balance

Dependent variable:,Age,Female,Education,Mobile device
Constant,37.286***,0.507***,4.574***,0.034***
,(0.536),(0.024),(0.062),(0.009)
Charisma without Goal,0.406,-0.032,-0.158*,-0.004
,(0.765),(0.034),(0.088),(0.012)
Goal,0.472,-0.050,-0.127,0.023*
,(0.769),(0.034),(0.087),(0.014)
Full charisma,0.590,-0.056*,-0.056,0.000
,(0.765),(0.034),(0.088),(0.012)
R-squared,0.000,0.002,0.002,0.003
R-squared Adj.,-0.001,0.000,0.001,0.001


#### Convert to html

In [10]:
!jupyter nbconvert --output-dir='./docs' --to html 2_balance_and_sample.ipynb

[NbConvertApp] Converting notebook 2_balance_and_sample.ipynb to html
[NbConvertApp] Writing 604633 bytes to docs/2_balance_and_sample.html
