In [1]:
import pandas as pd
import numpy as np
import re

from data_cleaning import *
import warnings
warnings.filterwarnings('ignore')

import scipy
from stargazer.stargazer import Stargazer
import statsmodels.api as sm
from scipy.stats import ttest_1samp, f

print('* pandas version :\t', pd.__version__)
print('* numpy version :\t', np.__version__)
print('* statsmodels version :\t', sm.__version__)
print('* scipy version :\t', scipy.__version__)

* pandas version :	 1.5.2
* numpy version :	 1.23.5
* statsmodels version :	 0.13.5
* scipy version :	 1.9.3


In [2]:
r1 = pd.read_csv("raw_data/all_apps_wide_2022-11-04.csv")
r2 = pd.read_csv("raw_data/all_apps_wide_2022-11-18.csv")
r = pd.concat([r1,r2])
r = r.convert_dtypes()
del r1, r2

r = clean_data(r)
r.head()

Unnamed: 0,treatment,iq_score,rank,prior_1,prior_2,prior_3,prior_4,goodnews,posterior_1,posterior_2,...,surveyIQ1,surveyIQ2,surveyIQ3,time_pref,risk_pref,rank_prior,rank_posterior,rank_bayesian,adjustment,b_adjustment
0,now,7,4,0.1,0.3,0.4,0.2,0,0.05,0.3,...,0,2,3,164,76,2.7,2.8,3.176471,0.1,0.476471
1,now,5,4,0.1,0.2,0.35,0.35,0,0.05,0.15,...,0,1,2,317,76,2.95,3.3,3.435897,0.35,0.485897
2,now,9,2,0.1,0.2,0.35,0.35,1,0.2,0.3,...,0,5,3,164,44,2.95,2.4,2.047619,-0.55,-0.902381
3,now,15,1,0.25,0.25,0.25,0.25,1,0.33,0.34,...,0,3,5,164,166,2.5,2.0,1.666667,-0.5,-0.833333
4,now,11,1,0.2,0.4,0.3,0.1,1,0.3,0.5,...,0,3,2,166,196,2.3,1.9,1.823529,-0.4,-0.476471


# Summary stats

In [3]:
r.loc[:, ['iq_score', 'rank', 'prior_1', 'prior_2', 'prior_3', 'prior_4', 'posterior_1', 'posterior_2', 'posterior_3', 'posterior_4', 'goodnews', 'rank_prior', 'rank_posterior', 'rank_bayesian', 'adjustment', 'b_adjustment', 'age']].describe().round(decimals=3)

Unnamed: 0,iq_score,rank,prior_1,prior_2,prior_3,prior_4,posterior_1,posterior_2,posterior_3,posterior_4,goodnews,rank_prior,rank_posterior,rank_bayesian,adjustment,b_adjustment,age
count,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0,98.0
mean,8.908,2.5,0.209,0.293,0.298,0.2,0.185,0.308,0.302,0.205,0.5,2.49,2.527,2.516,0.037,0.026,20.449
std,2.847,1.133,0.167,0.116,0.115,0.141,0.208,0.148,0.15,0.226,0.503,0.461,0.675,0.779,0.519,0.638,2.883
min,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.01,1.069,-1.0,-1.093,18.0
25%,7.0,1.25,0.1,0.202,0.25,0.1,0.0,0.2,0.2,0.0,0.0,2.3,2.0,1.815,-0.35,-0.521,18.0
50%,9.0,2.5,0.2,0.3,0.3,0.2,0.1,0.3,0.33,0.125,0.5,2.5,2.525,2.529,0.0,0.034,20.0
75%,11.0,3.75,0.25,0.4,0.35,0.25,0.3,0.4,0.4,0.348,1.0,2.787,3.05,3.282,0.437,0.551,21.0
max,15.0,4.0,1.0,0.66,0.6,0.6,0.99,0.7,0.6,1.0,1.0,3.55,4.0,3.708,2.0,2.0,35.0


## Table 2
Balance check

In [4]:
open("now_stat.tex",mode="w").write(
    np.round(
        r.loc[r.loc[:,'treatment']=="now",['iq_score', 'rank_prior', 'age', 'gender']].describe(percentiles=[]), 
        3
        ).to_latex()
    )
open("10_stat.tex",mode="w").write(
    np.round(
        r.loc[r.loc[:,'treatment']=="10",['iq_score', 'rank_prior', 'age', 'gender']].describe(percentiles=[]), 
        3
        ).to_latex()
    )
open("never_stat.tex",mode="w").write(
    np.round(
        r.loc[r.loc[:,'treatment']=="never",['iq_score', 'rank_prior', 'age', 'gender']].describe(percentiles=[]), 
        3
        ).to_latex()
    )

452

# Regression analysis

In [5]:
y_now_g = r.loc[(r.loc[:,'treatment']=='now') & (r.loc[:,'goodnews']==1),"adjustment"]
X_now_g = sm.add_constant(r.loc[(r.loc[:,'treatment']=='now') & (r.loc[:,'goodnews']==1),"b_adjustment"])

y_now_b = r.loc[(r.loc[:,'treatment']=='now') & (r.loc[:,'goodnews']==0),"adjustment"]
X_now_b = sm.add_constant(r.loc[(r.loc[:,'treatment']=='now') & (r.loc[:,'goodnews']==0),"b_adjustment"])

y_10_g = r.loc[(r.loc[:,'treatment']=='10') & (r.loc[:,'goodnews']==1),"adjustment"]
X_10_g = sm.add_constant(r.loc[(r.loc[:,'treatment']=='10') & (r.loc[:,'goodnews']==1),"b_adjustment"])

y_10_b = r.loc[(r.loc[:,'treatment']=='10') & (r.loc[:,'goodnews']==0),"adjustment"]
X_10_b = sm.add_constant(r.loc[(r.loc[:,'treatment']=='10') & (r.loc[:,'goodnews']==0),"b_adjustment"])

y_never_g = r.loc[(r.loc[:,'treatment']=='never') & (r.loc[:,'goodnews']==1),"adjustment"]
X_never_g = sm.add_constant(r.loc[(r.loc[:,'treatment']=='never') & (r.loc[:,'goodnews']==1),"b_adjustment"])

y_never_b = r.loc[(r.loc[:,'treatment']=='never') & (r.loc[:,'goodnews']==0),"adjustment"]
X_never_b = sm.add_constant(r.loc[(r.loc[:,'treatment']=='never') & (r.loc[:,'goodnews']==0),"b_adjustment"])

est_now_g = sm.OLS(y_now_g, X_now_g).fit(cov_type="HC1")
est_now_b = sm.OLS(y_now_b, X_now_b).fit(cov_type="HC1")
est_10_g = sm.OLS(y_10_g, X_10_g).fit(cov_type="HC1")
est_10_b = sm.OLS(y_10_b, X_10_b).fit(cov_type="HC1")
est_never_g = sm.OLS(y_never_g, X_never_g).fit(cov_type="HC1")
est_never_b = sm.OLS(y_never_b, X_never_b).fit(cov_type="HC1")

## Table 3

In [6]:
s = Stargazer([
    est_now_g, est_now_b, 
    est_10_g, est_10_b, 
    est_never_g, est_never_b
    ])
s.dependent_variable = "Adjustment"
s.custom_columns([
    "Now, Good news", "Now, Bad news", 
    "10 days later, Good news", "10 days later, Bad news", 
    "Never, Good news", "Never, Bad news"
    ], [1,1,1,1,1,1])
s.rename_covariates({'b_adjustment': 'Bayesian adjustment'})
s.show_degrees_of_freedom(False)
s.add_custom_notes(['Robust standard errors (HC1) in parentheses'])
s.show_f_statistic = False
s

0,1,2,3,4,5,6
,,,,,,
,Dependent variable:Adjustment,Dependent variable:Adjustment,Dependent variable:Adjustment,Dependent variable:Adjustment,Dependent variable:Adjustment,Dependent variable:Adjustment
,,,,,,
,"Now, Good news","Now, Bad news","10 days later, Good news","10 days later, Bad news","Never, Good news","Never, Bad news"
,(1),(2),(3),(4),(5),(6)
,,,,,,
Bayesian adjustment,0.698***,1.055***,0.498***,0.718***,0.610***,0.607
,(0.211),(0.055),(0.184),(0.175),(0.218),(0.752)
const,-0.042,-0.156**,-0.062,0.028,0.004,0.007
,(0.093),(0.065),(0.106),(0.118),(0.105),(0.390)


In [7]:
open("reg.tex", mode="w").write(s.render_latex())

1327

### Testing expected rank | prior

In [8]:
sub_now = r.loc[r.loc[:, 'treatment'] == 'now',:]
sub_10 = r.loc[r.loc[:, 'treatment'] == '10',:]
sub_never = r.loc[r.loc[:, 'treatment'] == 'never',:]
print(f"* two-sided t-test, prior")
t, p, _ = sm.stats.ttest_ind(sub_now['rank_prior'], sub_10['rank_prior'], alternative = 'two-sided', usevar = 'pooled')
print(f"- Expected rank cond on prior - now vs. 10 days later: t stat = {t: 0.3f}, p value = {p: 0.3f}")
t, p, _ = sm.stats.ttest_ind(sub_now['rank_prior'], sub_never['rank_prior'], alternative = 'two-sided', usevar = 'pooled')
print(f"- Expected rank cond on prior - now vs. never: t stat = {t: 0.3f}, p value = {p: 0.3f}")
t, p, _ = sm.stats.ttest_ind(sub_10['rank_prior'], sub_never['rank_prior'], alternative = 'two-sided', usevar = 'pooled')
print(f"- Expected rank cond on prior - 10 days leter vs. never: t stat = {t: 0.3f}, p value = {p: 0.3f}")

* two-sided t-test, prior
- Expected rank cond on prior - now vs. 10 days later: t stat =  0.239, p value =  0.812
- Expected rank cond on prior - now vs. never: t stat = -0.809, p value =  0.422
- Expected rank cond on prior - 10 days leter vs. never: t stat = -1.129, p value =  0.263


### Testing overconfidence *ex ante*

In [9]:
t, p = ttest_1samp(r['rank_prior'].to_numpy(), 2.5, alternative='less')
print("* H0: E[rank|reported prior] < E[rank] = 2.5")
print(f"p-value: {p: 0.3f}")

* H0: E[rank|reported prior] < E[rank] = 2.5
p-value:  0.413


# Chow test (Table 4)

In [10]:
y_pool_g = r.loc[(r.loc[:,'goodnews']==1),"adjustment"]
X_pool_g = sm.add_constant(r.loc[(r.loc[:,'goodnews']==1),"b_adjustment"])

y_pool_b = r.loc[(r.loc[:,'goodnews']==0),"adjustment"]
X_pool_b = sm.add_constant(r.loc[(r.loc[:,'goodnews']==0),"b_adjustment"])

est_pool_g = sm.OLS(y_pool_g, X_pool_g).fit(cov_type="HC1")
est_pool_b = sm.OLS(y_pool_b, X_pool_b).fit(cov_type="HC1")

In [11]:
l = np.array([[est_now_b.ssr, est_now_g.ssr],
                  [est_10_b.ssr, est_10_g.ssr],
                  [est_never_b.ssr, est_never_g.ssr]])
n = np.array([est_now_g.nobs, est_10_g.nobs, est_never_g.nobs])
sub = np.array(["Now", "10 days later", "Never"])
k = 2
    
for g in [1,0]:
    news = ["Bad news", "Good news"][g]
    this_model = [est_pool_b, est_pool_g][g]
    rss_c = this_model.ssr
    print(f"* Chow test result, {news}")
    for i in [[0,1], [1,2], [0,2]]:
        rss1, rss2 = l[i][g]
        n1, n2 = n[i]
        sub1, sub2 = sub[i]
        
        frac1 = (rss_c-(rss1+rss2))/k
        frac2 = (rss1+rss2)/(n1+n2-2*k)
        chow = frac1/frac2
        print(f"\t- Conditions: {sub1} & {sub2}")
        print(f"\t- p-value: {f.cdf(chow, k, n1+n2-2*k)}", end="\n\n")

* Chow test result, Good news
	- Conditions: Now & 10 days later
	- p-value: 0.9997751055059031

	- Conditions: 10 days later & Never
	- p-value: 0.0

	- Conditions: Now & Never
	- p-value: 0.0

* Chow test result, Bad news
	- Conditions: Now & 10 days later
	- p-value: 0.9999999432115646

	- Conditions: 10 days later & Never
	- p-value: 0.9999999809076646

	- Conditions: Now & Never
	- p-value: 0.9999996988146018

