In [42]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
from tabulate import tabulate
from statsmodels.iolib.summary2 import summary_col

df = pd.read_csv("Group 2 Experiment.csv")

recol = []
correct_recol = [7, 5, 3, 9, 5, 9,  
                41022015, 80266500, 30894458, 34961041, 11712864, 63517353,
                2, 6, 6, 4, 1, 4,
                39295009, 62178411, 89374076, 46781487, 25154074, 33457307]

# single digit
risk_1 = [] # 1 is risky choice
patience_1 = [] # 1 is earlier option

# eight digits
risk_8 = []
patience_8 = []

gender = [] # 1 is Female


for index, row in df.iterrows():
    temp_rq = []

    # check for submission date
    if not row["Timestamp"][:10] == "2023/04/06":
        continue

    # print(index)
    for i in range(1, 25):
        rq = str(row[f"Recollection Question.{i}"])
        rq = int(rq) if rq.isdigit() else 0
        temp_rq.append(rq)

        g = 0 if (str(row["How do you describe yourself? (Select all that apply)"])[:4] == "Male") else 1 # 1 is female
        lat = 1 if (str(row["Are you of Hispanic, Latino, or Spanish origin?"])[:3] == "Yes") else 0 # 1 is hispanic/latino/spanish
        white = 1 if (str(row["What is your race (choose all that apply)"])[:5] == "White") else 0 # 1 is white
        
        # get all the risk answers
        if 1 <= i and i <= 6:
            # gender.append(g)
            risk_1.append([1 if (str(row[f"Unnamed: {2*i + 2}"])[7:8] == "B") else 0, g, lat, white])
            # print(str(row[f"Unnamed: {2*i + 2}"])[7:8], risk_1[-1])
        if 7 <= i and i <=12:
            risk_8.append([1 if (str(row[f"Unnamed: {2*i + 2}"])[7:8] == "B") else 0, g, lat, white])
 
        # get all the patience answers
        if 13 <= i and i <= 18:
            patience_1.append([1 if (str(row[f"Unnamed: {2*i + 2}"])[7:8] == "A") else 0, g, lat, white])
        if 19 <= i and i <= 24:
            patience_8.append([1 if (str(row[f"Unnamed: {2*i + 2}"])[7:8] == "A") else 0, g, lat, white])

    recol.append(temp_rq)



correct_recol = np.asarray(correct_recol, dtype=np.int64)
recol = np.asarray(recol, dtype=np.int64)

risk_1 = np.asarray(risk_1, dtype=np.float32)
risk_8 = np.asarray(risk_8, dtype=np.float32)

patience_1 = np.asarray(patience_1, dtype=np.float32)
patience_8 = np.asarray(patience_8, dtype=np.float32)

gender = np.asarray(gender)
print(risk_1.shape, np.sum(risk_1))
print(risk_8.shape, np.sum(risk_8))
print(patience_1.shape, np.sum(patience_1))
print(patience_8.shape, np.sum(patience_8))
print(gender.shape)

(102, 4) 176.0
(102, 4) 181.0
(102, 4) 132.0
(102, 4) 137.0
(0,)


In [7]:
'''
These are preliminary statistics.
Compare this to Table 3 in the paper
'''

diff = recol - correct_recol
diff = np.where(diff != 0, diff, 1)
diff = np.where(diff == 1, diff, 0)

correct_1 = np.vstack((diff[:, 0:6], diff[:, 12:18])).ravel()
correct_8 = np.vstack((diff[:, 6:12], diff[:, 18:24])).ravel()
correct_1_p = np.mean(correct_1)
correct_8_p = np.mean(correct_8)

risky_1_p = np.mean(risk_1[:, :1])
risky_8_p = np.mean(risk_8[:, :1])

early_1_p = np.mean(patience_1[:, :1])
early_8_p = np.mean(patience_8[:, :1])

response_count = recol.shape[0] * recol.shape[1]

risky_1_gain = risk_1[:, :1].reshape(-1, 3)[0::2].ravel()
risky_1_loss = risk_1[:, :1].reshape(-1, 3)[1::2].ravel()

risky_8_gain = risk_8[:, :1].reshape(-1, 3)[0::2].ravel()
risky_8_loss = risk_8[:, :1].reshape(-1, 3)[1::2].ravel()

early_1_today = np.asarray([i[[0, 2, 3, 5]].tolist() for i in patience_1[:, :1].reshape(-1, 6)]).ravel()
early_1_future = np.asarray([i[[1, 4]].tolist() for i in patience_1[:, :1].reshape(-1, 6)]).ravel()

early_8_today = np.asarray([i[[0, 2, 3, 5]].tolist() for i in patience_8[:, :1].reshape(-1, 6)]).ravel()
early_8_future = np.asarray([i[[1, 4]].tolist() for i in patience_8[:, :1].reshape(-1, 6)]).ravel()

risky_1_male = risk_1[:, :1][np.argwhere(risk_1[:, 1:] == 0)[:, :1].ravel()] 
risky_1_female = risk_1[:, :1][np.argwhere(risk_1[:, 1:] == 1)[:, :1].ravel()]

risky_8_male = risk_8[:, :1][np.argwhere(risk_1[:, 1:] == 0)[:, :1].ravel()] 
risky_8_female = risk_8[:, :1][np.argwhere(risk_1[:, 1:] == 1)[:, :1].ravel()]

early_1_male = patience_1[:, :1][np.argwhere(patience_1[:, 1:] == 0)[:, :1].ravel()] 
early_1_female = patience_1[:, :1][np.argwhere(patience_1[:, 1:] == 1)[:, :1].ravel()]

early_8_male = patience_8[:, :1][np.argwhere(patience_1[:, 1:] == 0)[:, :1].ravel()] 
early_8_female = patience_8[:, :1][np.argwhere(patience_1[:, 1:] == 1)[:, :1].ravel()]

print(tabulate([["", "1 Digit (% / # response)", "8 Digit (% / # response)", "Unpaired t-test"], 
                ["Digit Memorization Correct", f"{correct_1_p} / {response_count}", f"{correct_8_p} / {response_count}", f"p={stats.ttest_ind(correct_1, correct_8)[1]}"],
                ["Risky Choice", f"{risky_1_p} / {risk_1[:, :1].shape[0]}", f"{risky_8_p} / {risk_8[:, :1].shape[0]}"],
                ["Risky Choice (Gain)", f"{np.mean(risky_1_gain)} / {risky_1_gain.shape[0]}", f"{np.mean(risky_8_gain)} / {risky_8_gain.shape[0]}"],
                ["Risky Choice (Loss)", f"{np.mean(risky_1_loss)} / {risky_1_loss.shape[0]}", f"{np.mean(risky_8_loss)} / {risky_8_loss.shape[0]}"],
                ["Risky Choice (Male)", f"{np.mean(risky_1_male)} / {risky_1_male.shape[0]}", f"{np.mean(risky_8_male)} / {risky_8_male.shape[0]}"],
                ["Risky Choice (Female)", f"{np.mean(risky_1_female)} / {risky_1_female.shape[0]}", f"{np.mean(risky_8_female)} / {risky_8_female.shape[0]}"],
                ["Early Option", f"{early_1_p} / {patience_1[:, :1].shape[0]}", f"{early_8_p} / {patience_8[:, :1].shape[0]}"],
                ["Early Option (Today vs Today)", f"{np.mean(early_1_today)} / {early_1_today.shape[0]}", f"{np.mean(early_8_today)} / {early_8_today.shape[0]}"],
                ["Early Option (Future vs Future)", f"{np.mean(early_1_future)} / {early_1_future.shape[0]}", f"{np.mean(early_8_future)} / {early_8_future.shape[0]}"],
                ["Early Option (Male)", f"{np.mean(early_1_male)} / {early_1_male.shape[0]}", f"{np.mean(early_8_male)} / {early_8_male.shape[0]}"],
                ["Early Option (Female)", f"{np.mean(early_1_female)} / {early_1_female.shape[0]}", f"{np.mean(early_8_female)} / {early_8_female.shape[0]}"]],
                headers="firstrow"))
# stats.ttest_ind(risk_1[:, :1].ravel(), risk_8[:, :1].ravel())[1]

                                 1 Digit (% / # response)    8 Digit (% / # response)    Unpaired t-test
-------------------------------  --------------------------  --------------------------  -----------------------
Digit Memorization Correct       0.9754901960784313 / 408    0.5196078431372549 / 408    p=3.055761268390349e-30
Risky Choice                     0.6078431606292725 / 102    0.656862735748291 / 102
Risky Choice (Gain)              0.686274528503418 / 51      0.7450980544090271 / 51
Risky Choice (Loss)              0.529411792755127 / 51      0.5686274766921997 / 51
Risky Choice (Male)              0.6666666865348816 / 192    0.7135416865348816 / 192
Risky Choice (Female)            0.5087719559669495 / 114    0.5614035129547119 / 114
Early Option                     0.1764705926179886 / 102    0.22549019753932953 / 102
Early Option (Today vs Today)    0.14705882352941177 / 68    0.20588235294117646 / 68
Early Option (Future vs Future)  0.23529411764705882 / 34    0.264705

In [79]:
'''
This summary is for 8 digit DV versus risky choice chosen. 
Compare this to Model 2a in Table 4 in the paper.
'''


df1 = pd.DataFrame({"Constant": np.ones(shape=(risk_1.shape[0] + risk_8.shape[0], 1)).T[0],
                   "EightDigitNum": np.vstack((np.ones(shape=(risk_8.shape[0], 1)), np.zeros(shape=(risk_1.shape[0], 1)))).T[0],
                   "Female": np.vstack((risk_8[:, 1:2], risk_1[:, 1:2])).T[0], 
                   "Lat_His_Span": np.vstack((risk_8[:, 2:3], risk_1[:, 2:3])).T[0],
                   "White": np.vstack((risk_8[:, 3:4], risk_1[:, 3:4])).T[0],
                   "RiskyChoice": np.vstack((risk_8[:, :1], risk_1[:, :1])).T[0]})

r1 = smf.ols("RiskyChoice ~ EightDigitNum", data=df1).fit()
r2 = smf.ols("RiskyChoice ~ EightDigitNum + Female", data=df1).fit()
r3 = smf.ols("RiskyChoice ~ EightDigitNum + Female + Lat_His_Span + White", data=df1).fit()
sg1 = Stargazer([r1, r2, r3])
# print(sg1.render_latex())
sg1

0,1,2,3
,,,
,Dependent variable:RiskyChoice,Dependent variable:RiskyChoice,Dependent variable:RiskyChoice
,,,
,(1),(2),(3)
,,,
EightDigitNum,0.049,0.049,0.049
,(0.068),(0.065),(0.064)
Female,,-0.309***,-0.366***
,,(0.077),(0.083)
Intercept,0.608***,0.681***,0.725***


In [78]:
'''
This summary is for 8 digit DV versus earlier option chosen. 
Compare this to Model 3a in Table 4 in the paper.
'''

df2 = pd.DataFrame({"Constant": np.ones(shape=(patience_1.shape[0] + patience_8.shape[0], 1)).T[0],
                   "EightDigitNum": np.vstack((np.ones(shape=(patience_8.shape[0], 1)), np.zeros(shape=(patience_1.shape[0], 1)))).T[0],
                   "Female": np.vstack((patience_8[:, 1:2], patience_1[:, 1:2])).T[0], 
                   "Lat_His_Span": np.vstack((patience_8[:, 2:3], patience_1[:, 2:3])).T[0],
                   "White": np.vstack((patience_8[:, 3:4], patience_1[:, 3:4])).T[0],
                   "EarlierOption": np.vstack((patience_8[:, :1], patience_1[:, :1])).T[0]})

r1 = smf.ols("EarlierOption ~ EightDigitNum", data=df2).fit()
r2 = smf.ols("EarlierOption ~ EightDigitNum + Female", data=df2).fit()
r3 = smf.ols("EarlierOption ~ EightDigitNum + Female + Lat_His_Span + White", data=df2).fit()
sg2 = Stargazer([r1, r2, r3])
# print(sg2.render_latex())
sg2

0,1,2,3
,,,
,Dependent variable:EarlierOption,Dependent variable:EarlierOption,Dependent variable:EarlierOption
,,,
,(1),(2),(3)
,,,
EightDigitNum,0.049,0.049,0.049
,(0.056),(0.056),(0.055)
Female,,-0.072,-0.057
,,(0.066),(0.071)
Intercept,0.176***,0.193***,0.330***
