In [303]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from tabulate import tabulate

df = pd.read_csv("Group 2 Experiment.csv")

recol = []
correct_recol = [7, 5, 3, 9, 5, 9,  
                41022015, 80266500, 30894458, 34961041, 11712864, 63517353,
                2, 6, 6, 4, 1, 4,
                39295009, 62178411, 89374076, 46781487, 25154074, 33457307]

# single digit
risk_1 = [] # 1 is risky choice
patience_1 = [] # 1 is earlier option

# eight digits
risk_8 = []
patience_8 = []

gender = [] # 1 is male

for index, row in df.iterrows():
    temp_rq = []

    # check for submission date
    if not row["Timestamp"][:10] == "2023/04/06":
        continue

    # print(index)
    for i in range(1, 25):
        rq = str(row[f"Recollection Question.{i}"])
        rq = int(rq) if rq.isdigit() else 0
        temp_rq.append(rq)

        g = 1 if (str(row["How do you describe yourself? (Select all that apply)"])[:4] == "Male") else 0

        # get all the risk answers
        if 1 <= i and i <= 6:
            gender.append(g)
            risk_1.append(1 if (str(row[f"Unnamed: {2*i + 2}"])[7:8] == "B") else 0)
            # print(str(row[f"Unnamed: {2*i + 2}"])[7:8], risk_1[-1])
        if 7 <= i and i <=12:
            risk_8.append(1 if (str(row[f"Unnamed: {2*i + 2}"])[7:8] == "B") else 0)
 
        # get all the patience answers
        if 13 <= i and i <= 18:
            patience_1.append(1 if (str(row[f"Unnamed: {2*i + 2}"])[7:8] == "A") else 0)
        if 19 <= i and i <= 24:
            patience_8.append(1 if (str(row[f"Unnamed: {2*i + 2}"])[7:8] == "A") else 0)

    recol.append(temp_rq)



correct_recol = np.asarray(correct_recol, dtype=np.int64)
recol = np.asarray(recol, dtype=np.int64)

risk_1 = np.asarray(risk_1, dtype=np.float32)
risk_8 = np.asarray(risk_8, dtype=np.float32)

patience_1 = np.asarray(patience_1, dtype=np.float32)
patience_8 = np.asarray(patience_8, dtype=np.float32)

gender = np.asarray(gender)
print(risk_1.shape, np.sum(risk_1))
print(risk_8.shape, np.sum(risk_8))
print(patience_1.shape, np.sum(patience_1))
print(patience_8.shape, np.sum(patience_8))
print(gender.shape)

(102,) 62.0
(102,) 67.0
(102,) 18.0
(102,) 23.0
(102,)


In [304]:
'''
These are preliminary statistics.
Compare this to Table 3 in the paper
'''

diff = recol - correct_recol
diff = np.where(diff != 0, diff, 1)
diff = np.where(diff == 1, diff, 0)

correct_1_p = np.mean([np.mean(diff[:, 0:6]), np.mean(diff[:, 12:18])])
correct_8_p = np.mean([np.mean(diff[:, 6:12]), np.mean(diff[:, 18:24])])

risky_1_p = np.sum(risk_1)/risk_1.shape[0]
risky_8_p = np.sum(risk_8)/risk_8.shape[0]

early_1_p = np.sum(patience_1)/patience_1.shape[0]
early_8_p = np.sum(patience_8)/patience_8.shape[0]

response_count = recol.shape[0] * recol.shape[1]

print(tabulate([["", "1 Digit (% / # response)", "8 Digit (% / # response)"], 
                ["Digit Memorization Correct", f"{correct_1_p} / {response_count}", f"{correct_8_p} / {response_count}"],
                ["Risky Choice", f"{risky_1_p} / {risk_1.shape[0]}", f"{risky_8_p} / {risk_8.shape[0]}"],
                ["Early Option", f"{early_1_p} / {patience_1.shape[0]}", f"{early_8_p} / {patience_8.shape[0]}"]]
                , headers="firstrow"))

                            1 Digit (% / # response)    8 Digit (% / # response)
--------------------------  --------------------------  --------------------------
Digit Memorization Correct  0.9754901960784313 / 408    0.5196078431372549 / 408
Risky Choice                0.6078431372549019 / 102    0.6568627450980392 / 102
Early Option                0.17647058823529413 / 102   0.22549019607843138 / 102


In [305]:
'''
This summary is for 8 digit DV versus risky choice chosen. 
Compare this to Model 2a in Table 4 in the paper.
'''

risk_x = np.hstack((np.ones(risk_8.shape), np.zeros(risk_1.shape)))
risk_x = np.stack((np.ones(risk_x.shape), risk_x, np.hstack((gender, gender))), axis=1) # add intercept and gender
# risk_x = np.stack((np.ones(risk_x.shape), risk_x,), axis=1) # add intercept
risk_y = np.hstack((risk_8, risk_1))

res1 = sm.OLS(risk_y, risk_x).fit()
print(res1.summary2(xname=["Constant", "8-Digit Number", "Gender"], yname="Risky Choice Chosen"))

                  Results: Ordinary least squares
Model:              OLS                 Adj. R-squared:     0.067   
Dependent Variable: Risky Choice Chosen AIC:                271.0409
Date:               2023-04-07 08:34    BIC:                280.9952
No. Observations:   204                 Log-Likelihood:     -132.52 
Df Model:           2                   F-statistic:        8.340   
Df Residuals:       201                 Prob (F-statistic): 0.000332
R-squared:          0.077               Scale:              0.21787 
---------------------------------------------------------------------
                    Coef.   Std.Err.    t     P>|t|    [0.025  0.975]
---------------------------------------------------------------------
Constant            0.3713    0.0749  4.9589  0.0000   0.2237  0.5190
8-Digit Number      0.0490    0.0654  0.7500  0.4541  -0.0799  0.1779
Gender              0.3093    0.0770  4.0146  0.0001   0.1574  0.4612
-----------------------------------------------

In [306]:
'''
This summary is for 8 digit DV versus earlier option chosen. 
Compare this to Model 3a in Table 4 in the paper.
'''

patience_x = np.hstack((np.ones(patience_8.shape), np.zeros(patience_1.shape)))
patience_x = np.stack((np.ones(patience_x.shape), patience_x, np.hstack((gender, gender))), axis=1) # add intercept and gender
# patience_x = np.stack((np.ones(patience_x.shape), patience_x,), axis=1) # add intercept
patience_y = np.hstack((patience_8, patience_1))

res1 = sm.OLS(patience_y, patience_x).fit()
print(res1.summary2(xname=["Constant", "8-Digit Number", "Gender"], yname="Earlier Option Chosen"))

                   Results: Ordinary least squares
Model:              OLS                   Adj. R-squared:     -0.000  
Dependent Variable: Earlier Option Chosen AIC:                209.8665
Date:               2023-04-07 08:34      BIC:                219.8208
No. Observations:   204                   Log-Likelihood:     -101.93 
Df Model:           2                     F-statistic:        0.9709  
Df Residuals:       201                   Prob (F-statistic): 0.381   
R-squared:          0.010                 Scale:              0.16142 
------------------------------------------------------------------------
                  Coef.    Std.Err.     t      P>|t|     [0.025   0.975]
------------------------------------------------------------------------
Constant          0.1213     0.0645   1.8823   0.0612   -0.0058   0.2484
8-Digit Number    0.0490     0.0563   0.8713   0.3846   -0.0619   0.1600
Gender            0.0721     0.0663   1.0875   0.2781   -0.0586   0.2029
--------------