In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats.mstats import winsorize
from scipy.stats import zscore
from scipy import stats
from scipy.stats import ttest_rel, ttest_ind, wilcoxon
rng = np.random.default_rng()
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from patsy.contrasts import Treatment
#import warnings
#warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("transformed_data.csv")

In [None]:
# Summary Statistics
# List of variables for summary stats
summary_vars = [
    'Age', 'Gender', 'High_wealth', 'Own_asset', 'Intere_inves',
    'Trade_frequently', 'numcorrect_financialliteracyall', 'Risk_taking',
    'Patient', 'Share_willing', 'Donate_charity', 'Donation',
    'Sustainability_Value', 'correct_Environ_Literacy', 'High_CurrentSustainShare',
    'Invest_in_SRI', 'correct_comprehension', 'Stat_know',
    'Number_think', 'Number_inter'
]

# Summary stats by treatment_num
summary_table = df[summary_vars + ['treatment_num']].groupby('treatment_num').agg(
    ['count', 'mean', 'std', 'min', 'max']
)

# Flatten column MultiIndex
summary_table.columns = ['_'.join(col).strip() for col in summary_table.columns.values]

In [None]:
# Map treatment labels to treatments
treatment_labels = {
    1: "Baseline",
    2: "Green",
    3: "Brown",
    4: "Green Low"
}

# Map treatment_num to label
df['treatment_label'] = df['treatment_num'].map(treatment_labels)

In [None]:
#T-tests Main Effects Allocations to Assets

def run_ttest(df, var, treat_col, group1, group2):
    group1_data = df[df[treat_col] == group1][var].dropna()
    group2_data = df[df[treat_col] == group2][var].dropna()
    
    tstat, pval = ttest_ind(group2_data, group1_data, equal_var=False)
    diff = group2_data.mean() - group1_data.mean()
    
    return {
        'Variable': var,
        'Group 1': treatment_labels[group1],
        'Group 2': treatment_labels[group2],
        'Mean Group 1': round(group1_data.mean(), 2),
        'Mean Group 2': round(group2_data.mean(), 2),
        'Difference': round(diff, 2),
        'T-Statistic': round(tstat, 2),
        'P-Value': round(pval, 4)
    }


In [None]:
tests = [
    ('HighCorr_investmentAsset_lowret', 1, 2),  # Baseline vs Green (Positive Corr)
    ('LowCorr_investmentAsset_lowret', 1, 2),   # Baseline vs Green (Negative Corr)
    ('HighCorr_investmentAsset_lowret', 1, 3),  # Baseline vs Brown (Positive Corr)
    ('LowCorr_investmentAsset_lowret', 1, 3),   # Baseline vs Brown (Negative Corr)
]

results = [run_ttest(df, var, 'treatment_num', g1, g2) for var, g1, g2 in tests]
results_df = pd.DataFrame(results)

In [None]:
results_df

In [None]:
# Extend the list of variables to test
more_tests = [
    ('correlation_neglect', 1, 2),
    ('correlation_neglect', 1, 3),
    ('consider_corr', 1, 2),
    ('consider_corr', 1, 3)
]

# Combine with previous
all_tests = tests + more_tests

# Run all
all_results = [run_ttest(df, var, 'treatment_num', g1, g2) for var, g1, g2 in all_tests]
ttest_results_df = pd.DataFrame(all_results)


In [None]:
ttest_results_df

In [None]:
#T-Tests â€” Deviation from Optimal Allocation
highcorr_optimal = 23.90
lowcorr_optimal = 45.60


def ttest_optimal(df, var, optimal_value, treatment):
    data = df[df['treatment_num'] == treatment][var].dropna()
    tstat, pval = ttest_1samp(data, popmean=optimal_value)
    diff = data.mean() - optimal_value

    return {
        'Variable': var,
        'Treatment': treatment_labels.get(treatment, treatment),
        'Mean': round(data.mean(), 2),
        'Optimal': optimal_value,
        'Difference': round(diff, 2),
        'T-Statistic': round(tstat, 2),
        'P-Value': round(pval, 4)
    }

optimal_tests = []

# Test deviation from optimal for HighCorr
for treatment in [1, 2, 3]:
    optimal_tests.append(ttest_optimal(df, 'HighCorr_investmentAsset_lowret', highcorr_optimal, treatment))

# Test deviation from optimal for LowCorr
for treatment in [1, 2, 3]:
    optimal_tests.append(ttest_optimal(df, 'LowCorr_investmentAsset_lowret', lowcorr_optimal, treatment))

optimal_tests_df = pd.DataFrame(optimal_tests)

In [None]:
optimal_tests_df

In [None]:
#Main Effects on Share_lowreturn (T-Tests Low Correlation Scenario)

# Filter the data for relevant treatments
df_t12 = df[df['treatment_num'].isin([1, 2])]
df_t13 = df[df['treatment_num'].isin([1, 3])]

# T-test: Treatment 1 vs 2
t12_group1 = df_t12[df_t12['treatment_num'] == 1]['Share_lowreturn'].dropna()
t12_group2 = df_t12[df_t12['treatment_num'] == 2]['Share_lowreturn'].dropna()
t_stat_t12, p_val_t12 = ttest_ind(t12_group1, t12_group2, equal_var=True)

# T-test: Treatment 1 vs 3
t13_group1 = df_t13[df_t13['treatment_num'] == 1]['Share_lowreturn'].dropna()
t13_group2 = df_t13[df_t13['treatment_num'] == 3]['Share_lowreturn'].dropna()
t_stat_t13, p_val_t13 = ttest_ind(t13_group1, t13_group2, equal_var=True)

print(f"Treatment 1 vs 2: t = {t_stat_t12:.2f}, p = {p_val_t12:.3f}")
print(f"Treatment 1 vs 3: t = {t_stat_t13:.2f}, p = {p_val_t13:.3f}")


In [None]:
#Plot Main Treatment Effects (Low Correlation Scenario)

# Prepare summary stats
grouped = df[df['treatment_num'].isin([1, 2, 3])].groupby('treatment_num')['Share_lowreturn']
means = grouped.mean()
stds = grouped.std()
counts = grouped.count()
cis = 1.96 * (stds / np.sqrt(counts))  

# Treatment labels
labels = ['Baseline', 'Green', 'Brown']

# Plot
plt.figure(figsize=(8, 5))
sns.barplot(x=labels, y=means.values, yerr=cis.values, capsize=0.2, palette='gray')

plt.ylabel("Share Allocated to Diversification Asset")
plt.title("Treatment Effects on Diversification Allocation (Low Corr)")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig("treatment_effects_lowcorr.png", dpi=300)
plt.show()


In [None]:
#Main Effects on Share_lowreturn (T-Tests High Correlation Scenario)
# T-test: HighCorr_investmentAsset_lowret (Treatment 1 vs 2 and 1 vs 3)
df_t12 = df[df['treatment_num'].isin([1, 2])]
df_t13 = df[df['treatment_num'].isin([1, 3])]

# T1 vs T2
t12_group1 = df_t12[df_t12['treatment_num'] == 1]['HighCorr_investmentAsset_lowret'].dropna()
t12_group2 = df_t12[df_t12['treatment_num'] == 2]['HighCorr_investmentAsset_lowret'].dropna()
t_stat_t12_pos, p_val_t12_pos = ttest_ind(t12_group1, t12_group2, equal_var=True)

# T1 vs T3
t13_group1 = df_t13[df_t13['treatment_num'] == 1]['HighCorr_investmentAsset_lowret'].dropna()
t13_group2 = df_t13[df_t13['treatment_num'] == 3]['HighCorr_investmentAsset_lowret'].dropna()
t_stat_t13_pos, p_val_t13_pos = ttest_ind(t13_group1, t13_group2, equal_var=True)

print(f"Treatment 1 vs 2 (High Corr): t = {t_stat_t12_pos:.2f}, p = {p_val_t12_pos:.3f}")
print(f"Treatment 1 vs 3 (High Corr): t = {t_stat_t13_pos:.2f}, p = {p_val_t13_pos:.3f}")


In [None]:
# #Plot Main Treatment Effects (High Correlation Scenario)
# Prepare summary stats
grouped_high = df[df['treatment_num'].isin([1, 2, 3])].groupby('treatment_num')['HighCorr_investmentAsset_lowret']
means_high = grouped_high.mean()
stds_high = grouped_high.std()
counts_high = grouped_high.count()
cis_high = 1.96 * (stds_high / np.sqrt(counts_high))

# Plot
plt.figure(figsize=(8, 5))
sns.barplot(x=labels, y=means_high.values, yerr=cis_high.values, capsize=5, palette='gray')

plt.ylabel("Share Allocated to Diversification Asset")
plt.title("Allocation in Positive Correlation Scenario")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
# Treatment Effects Correlation Neglect
# T1 vs T2
t12_group1 = df[df['treatment_num'] == 1]['correlation_neglect'].dropna()
t12_group2 = df[df['treatment_num'] == 2]['correlation_neglect'].dropna()
t_stat_cn_12, p_val_cn_12 = ttest_ind(t12_group1, t12_group2, equal_var=True)

# T1 vs T3
t13_group1 = df[df['treatment_num'] == 1]['correlation_neglect'].dropna()
t13_group2 = df[df['treatment_num'] == 3]['correlation_neglect'].dropna()
t_stat_cn_13, p_val_cn_13 = ttest_ind(t13_group1, t13_group2, equal_var=True)

print(f"Treatment 1 vs 2 (Corr Neglect): t = {t_stat_cn_12:.2f}, p = {p_val_cn_12:.3f}")
print(f"Treatment 1 vs 3 (Corr Neglect): t = {t_stat_cn_13:.2f}, p = {p_val_cn_13:.3f}")


In [None]:
# Summary stats
grouped_cn = df[df['treatment_num'].isin([1, 2, 3])].groupby('treatment_num')['correlation_neglect']
means_cn = grouped_cn.mean()
stds_cn = grouped_cn.std()
counts_cn = grouped_cn.count()
cis_cn = 1.96 * (stds_cn / np.sqrt(counts_cn))

# Plot
plt.figure(figsize=(8, 5))
sns.barplot(x=labels, y=means_cn.values, yerr=cis_cn.values, capsize=0.2, palette='gray')

plt.ylabel("Correlation Neglect")
plt.title("Correlation Neglect Across Treatments")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.ylim(0, None)
plt.tight_layout()
plt.show()


In [None]:
#Regression Models Main Effects Allocations
# Ensure consistent treatment coding 
df['treatment_num'] = pd.Categorical(df['treatment_num'], categories=[1, 2, 3], ordered=True)

# Define helper function to run clustered regression with correct treatment baseline
def run_regression(df, dependent_var, independent_vars, treatment_col, id_col):
    formula = f"{dependent_var} ~ {' + '.join(independent_vars)} + C({treatment_col}, Treatment(reference=1)) * High_Corr"
    model = smf.ols(formula, data=df).fit(cov_type='cluster', cov_kwds={'groups': df[id_col]})
    return model

# Define independent variables for the regression
independent_vars = [
    'Age', 'Gender', 'High_wealth', 'Patient', 'Risk_taking', 'Social_Preferences',
    'Donation', 'Sustainability_Value', 'numcorrect_financialliteracyall',
    'correct_Environ_Literacy', 'Stat_know', 'Number_think', 'Number_inter',
    'Own_asset', 'Intere_inves', 'Trade_frequently', 'High_CurrentSustainShare'
]

# Create High_Corr and Share_lowreturn variables
df['dup'] = np.tile([0, 1], len(df) // 2)
df['First_Round'] = np.where(df['dup'] == 0, 1, 0)
df['Second_Round'] = np.where(df['dup'] == 1, 1, 0)

df['High_Corr'] = np.where(
    (df['First_Round'] == 1) & (df['P1_low_first'] == 0), 1,
    np.where(
        (df['First_Round'] == 1) & (df['P1_low_first'] == 1), 0,
        np.where((df['First_Round'] == 0) & (df['P1_low_first'] == 1), 1, 0)
    )
)

df['Share_lowreturn'] = np.where(
    df['First_Round'] == 1,
    df['P1_investmentAsset_lowreturn'],
    df['P2_investmentAsset_lowreturn']
)

# Subset and fix categorical levels for green vs baseline
df_green = df[df['treatment_num'].isin([1, 2])].copy()
df_green['treatment_num'] = pd.Categorical(df_green['treatment_num'], categories=[1, 2], ordered=True)

# Subset and fix categorical levels for brown vs baseline
df_brown = df[df['treatment_num'].isin([1, 3])].copy()
df_brown['treatment_num'] = pd.Categorical(df_brown['treatment_num'], categories=[1, 3], ordered=True)

# Run regressions
model_green = run_regression(df_green, 'Share_lowreturn', independent_vars, 'treatment_num', 'id')
model_brown = run_regression(df_brown, 'Share_lowreturn', independent_vars, 'treatment_num', 'id')

# Combine into LaTeX regression table
main_effects_table = summary_col(
    [model_green, model_brown],
    stars=True,
    model_names=["Green vs Baseline", "Brown vs Baseline"],
    info_dict={
        'R-squared': lambda x: f"{x.rsquared:.2f}",
        'N': lambda x: f"{int(x.nobs)}"
    },
    float_format="%.2f"
)

# Print and save LaTeX
latex_main = main_effects_table.as_latex()
print(latex_main)

with open("main_effects_regression_results.tex", "w") as f:
    f.write(latex_main)


In [None]:
# Define common control variables
controls = [
    "Age", "Gender", "High_wealth", "Patient", "Risk_taking", "Social_Preferences", 
    "Donation", "Sustainability_Value", "numcorrect_financialliteracyall", 
    "correct_Environ_Literacy", "Stat_know", "Number_think", "Number_inter", 
    "Own_asset", "Intere_inves", "Trade_frequently", "High_CurrentSustainShare", 
    "correct_comprehension", "correct_dependence", "P1_high_return"
]

# Construct control string
control_str = " + ".join(controls)

# Create Lower_Bound, Upper_Bound, Expect_value based on First_Round
df["Expect_value"] = df["P1_Expect_value"].where(df["First_Round"] == 1, df["P2_Expect_value"])
df["Lower_Bound"] = df["P1_belief_lower"].where(df["First_Round"] == 1, df["P2_belief_lower"])
df["Upper_Bound"] = df["P1_belief_upper"].where(df["First_Round"] == 1, df["P2_belief_upper"])


# Treatment interaction
treatment_interaction = "C(treatment_num)*High_Corr + First_Round*High_Corr"

# Final formula function
def make_formula(y_var):
    return f"{y_var} ~ {treatment_interaction} + {control_str}"


In [None]:
# Filter for Green sample
df_green = df[df['treatment_num'].isin([1, 2])]

model_lower_green = smf.ols(make_formula("Lower_Bound"), data=df_green).fit(cov_type='cluster', cov_kwds={'groups': df_green["id"]})
model_upper_green = smf.ols(make_formula("Upper_Bound"), data=df_green).fit(cov_type='cluster', cov_kwds={'groups': df_green["id"]})
model_expect_green = smf.ols(make_formula("Expect_value"), data=df_green).fit(cov_type='cluster', cov_kwds={'groups': df_green["id"]})


In [None]:
# Filter for Brown sample
df_brown = df[df['treatment_num'].isin([1, 3])]

model_lower_brown = smf.ols(make_formula("Lower_Bound"), data=df_brown).fit(cov_type='cluster', cov_kwds={'groups': df_brown["id"]})
model_upper_brown = smf.ols(make_formula("Upper_Bound"), data=df_brown).fit(cov_type='cluster', cov_kwds={'groups': df_brown["id"]})
model_expect_brown = smf.ols(make_formula("Expect_value"), data=df_brown).fit(cov_type='cluster', cov_kwds={'groups': df_brown["id"]})


In [None]:
belief_models = {
    "Lower_Bound_Green": model_lower_green,
    "Upper_Bound_Green": model_upper_green,
    "Expected_Value_Green": model_expect_green,
    "Lower_Bound_Brown": model_lower_brown,
    "Upper_Bound_Brown": model_upper_brown,
    "Expected_Value_Brown": model_expect_brown,
} 



In [None]:
# Format the regression models for output
belief_table = summary_col(
    list(belief_models.values()),
    stars=True,
    model_names=list(belief_models.keys()),
    info_dict={
        'R-squared': lambda x: f"{x.rsquared:.2f}",
        'N': lambda x: f"{int(x.nobs)}"
    },
    float_format="%.2f"
)

# Print the LaTeX output
latex_belief = belief_table.as_latex()
print(latex_belief)

# Save to file
with open("beliefs_regression_results.tex", "w") as f:
    f.write(latex_belief)
