## List of tables:
### 1. [Table 3: Tabulate choices and beliefs](#tabulate)
### 2. [Table 4: Estimating treatment effects, with beliefs](#regression)






In [1]:
%matplotlib inline

#### Imports libraries

In [2]:
import matplotlib.pyplot as plt  # Plotting
import os  # File system handling
import pandas as pd  # Dataframe handling
import statsmodels.formula.api as smf  # Regressions analysis

from matplotlib.ticker import FuncFormatter  # Formating graphs
from scipy import stats  # Statistics module
from statsmodels.iolib.summary2 import summary_col  # Regression output table
from statsmodels.stats.anova import anova_lm  # Regression anova table
from statsmodels.stats.multicomp import MultiComparison  # Multi-comparisson tests
from statsmodels.stats.proportion import proportions_ztest # proportion Z-test

#### Set project directory

In [3]:
PROJECT_FOLDER = os.path.dirname(os.getcwd())
FINAL_DATA_FOLDER = os.path.join(PROJECT_FOLDER, 'data', 'final')
TABLES_FOLDER = os.path.join(PROJECT_FOLDER, 'reports', 'tables')
FIGURES_FOLDER = os.path.join(PROJECT_FOLDER, 'reports', 'figures')

#### Pandas options

In [4]:
pd.set_option("display.precision", 3)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_rows", 40)

#### Set plotting style

In [5]:
plt.style.use('classic')

#### Set plotting properties 

In [6]:
bar_kw = dict(kind='bar', color='0.4')
line_kw = dict(kind='line', lw=1, alpha=1, legend=True)
font_kw = dict(fontsize=11, color='k')
grid_kw = dict(linewidth=1, axis="y", zorder=2, antialiased=True)
xlab_kw = dict(fontsize=11, labelpad=3)
ylab_kw = dict(fontsize=11, labelpad=3)
error_kw = dict(elinewidth=2, ecolor='0.15')
legend_kw = dict(frameon=False)
tick_kw = dict(size=5,
               which='both',
               direction='out',
               right=False,
               top=False,
               labelbottom=True)

#### Retrieving dataframe

In [7]:
DATA = os.path.join(
    FINAL_DATA_FOLDER, 
    'fair_advice_final.feather'
)

df = pd.read_feather(DATA)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3598 entries, 0 to 3597
Data columns (total 28 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Dataset                   3598 non-null   int64         
 1   SubmitTime                3598 non-null   datetime64[ns]
 2   Duration                  3598 non-null   int64         
 3   Controlquestion           1799 non-null   float64       
 4   Controlquestion_attempts  1799 non-null   float64       
 5   Treatment                 3598 non-null   int64         
 6   Treatment_name            3598 non-null   object        
 7   Dictator                  3598 non-null   int64         
 8   Decision                  3598 non-null   int64         
 9   Advise                    1799 non-null   float64       
 10  Follow                    1799 non-null   float64       
 11  Belief1                   598 non-null    float64       
 12  Belief2             

#### Select only treatments with certain outcomes

In [8]:
df = df[(df['Risk'] == 0)]

#### Selector for only 'Free advice' treatment

In [9]:
df_free = df[(df['Free'] == 1)]

#### How many player X believe  that player Y will follow?

In [10]:
df_belief1 = df_free\
    .loc[(df_free['Dictator'] == 1), 'Belief1']\
    .agg(['mean', 'sem', 'count'])\
    .to_frame()

df_belief1

Unnamed: 0,Belief1
mean,0.752
sem,0.025
count,302.0


#### How many player Y follow?

In [11]:
df_belief2 = df_free\
    .loc[(df_free['Dictator'] == 0), 'Follow']\
    .agg(['mean', 'sem', 'count'])\
    .to_frame()

df_belief2

Unnamed: 0,Follow
mean,0.744
sem,0.025
count,301.0


#### Cross tabulating beliefs and choices <a id='tabulate'></a>

In [12]:
cross_table = pd.crosstab(
        df.loc[(df['Dictator'] == 1) & (df['Free'] == 1), 'Decision'],
        df.loc[(df['Dictator'] == 1) & (df['Free'] == 1), 'Belief1'],
        margins=True,
        normalize=False)\
    .rename({0:'Player Y will not follow', 1:'Player Y will follow'}, axis=1)\
    .rename_axis('Belief', axis=1)\
    .rename({0:'Fair', 1:'Selfish'}, axis=0)\
    .rename_axis('Decision', axis=0)

cross_table.style.format(precision=3).to_latex(
    os.path.join(TABLES_FOLDER, 'cross_table_belief1.tex'),
)

display(cross_table)

Belief,Player Y will not follow,Player Y will follow,All
Decision,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Fair,52,143,195
Selfish,23,84,107
All,75,227,302


#### Chi2 test

In [13]:
chi_val, p_val, dof, expected = stats.chi2_contingency(cross_table.iloc[:2, :2])
print(f'Chi²: {chi_val:.3f}, p-value: {p_val:.3f} ')

Chi²: 0.732, p-value: 0.392 


#### Fisher exact test

In [14]:
odds_ratio, p_value = stats.fisher_exact(cross_table.iloc[:2, :2], alternative='two-sided')
print(f'Odds-ratio={odds_ratio:.3f}, p-value={p_value:.3f}')

Odds-ratio=1.328, p-value=0.334


#### Select only player X and certain outcomes

In [15]:
df = df[(df['Dictator'] == 1) & (df['Risk'] == 0)]
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 903 entries, 1799 to 3597
Data columns (total 28 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Dataset                   903 non-null    int64         
 1   SubmitTime                903 non-null    datetime64[ns]
 2   Duration                  903 non-null    int64         
 3   Controlquestion           903 non-null    float64       
 4   Controlquestion_attempts  903 non-null    float64       
 5   Treatment                 903 non-null    int64         
 6   Treatment_name            903 non-null    object        
 7   Dictator                  903 non-null    int64         
 8   Decision                  903 non-null    int64         
 9   Advise                    0 non-null      float64       
 10  Follow                    0 non-null      float64       
 11  Belief1                   302 non-null    float64       
 12  Belief2           

#### Create indicator variable for disbelievers

In [16]:
df.loc[:, 'Disbelieve'] = 0
df.loc[df['Belief1'] == 0, 'Disbelieve'] = 1
df['Disbelieve'].value_counts()

0    828
1     75
Name: Disbelieve, dtype: int64

#### Estimating treatment effects, splitting ub by beliefs <a id='regression'></a>

In [17]:
regs = dict()
regs['reg1'] = 'Decision ~ Binding + Free + Free:Disbelieve'
regs['reg2'] = regs['reg1'] + ' + Age_m + Female + Education_m + Political_m'

ols_results = [
    smf.ols(formula=v, data=df).fit(cov_type='HC1') for v in regs.values()
]

order = [
    'Binding',
    'Free',
    'Free:Disbelieve',
    'Disbelieve',
    'Age_m',
    'Female',
    'Education_m',
    'Political_m',
    'Intercept',
]

auxiliary = {
    'N': lambda x: f'{x.nobs:.0f}',
    'F': lambda x: f'{x.fvalue:.3f}', 
    'P(>F)': lambda x: f'{x.f_pvalue:.3f}',
}

ols_quant = summary_col(
    ols_results,
    stars=True,
    info_dict=auxiliary,
    float_format='%.3f',
    regressor_order=order,
)

ols_quant = ols_quant\
    .tables[0]\
    .rename(mapper=lambda x: x.replace(':', ' * '), axis=0)\
    .rename(mapper=lambda x: x.split()[0].replace('Decision', 'p(selfish)'), axis=1)\
    .rename_axis('Dependent variable', axis=1)

ols_quant.style.format(precision=3).to_latex(
    os.path.join(TABLES_FOLDER, 'ols_decision_robust.tex'),
)

display(ols_quant)

Dependent variable,p(selfish),p(selfish).1
Binding,-0.135***,-0.133***
,(0.040),(0.040)
Free,-0.183***,-0.175***
,(0.043),(0.043)
Free * Disbelieve,-0.063,-0.090
,(0.062),(0.063)
Age_m,,-0.039
,,(0.033)
Female,,-0.131***
,,(0.033)


#### Convert to html

In [18]:
!jupyter nbconvert --output-dir='./docs' --to html 4_beliefs.ipynb

[NbConvertApp] Converting notebook 4_beliefs.ipynb to html
[NbConvertApp] Writing 606023 bytes to docs/4_beliefs.html
