In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats.mstats import winsorize
from scipy.stats import zscore
from scipy import stats
from scipy.stats import ttest_rel, ttest_ind, wilcoxon
rng = np.random.default_rng()
import statsmodels.formula.api as smf
#import warnings
#warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("transformed_data.csv")

In [None]:
# List of variables to summarize
summary_vars = [
    "Gender", "Age", "High_Income", "SocialPreferences1_1", "SocialPreferenceDona_1",
    "RiskPreferences_1", "Timepreferences_1", "FL_all", "Awareness_SustainInv",
    "investor", "ShareSustainInvest"
]

# Compute summary statistics (mean, std, min, max, count)
summary_table = df[summary_vars].describe().T  # Transpose for correct format
summary_table["count"] = df[summary_vars].count()  # Add count column

# Select and reorder relevant columns
summary_table = summary_table[["count", "mean", "std", "min", "max"]]

# Rename columns 
summary_table.columns = ["N", "Mean", "SD", "Min", "Max"]

# Format numeric values (2 decimals for Mean/SD, 0 for Min/Max)
summary_table = summary_table.round({"Mean": 2, "SD": 2, "Min": 0, "Max": 0})

# Rename rows (variable labels)
var_labels = {
    "Age": "Age",
    "Gender": "Female",
    "High_Income": "High Income",
    "SocialPreferences1_1": "Social Preferences",
    "SocialPreferenceDona_1": "Hypothetical Donation",
    "RiskPreferences_1": "Risk Preferences",
    "Timepreferences_1": "Time Preferences",
    "FL_all": "Financial Literacy",
    "Awareness_SustainInv": "Awareness SRI",
    "investor": "Investor",
    "ShareSustainInvest": "Share SRI"
}

summary_table.index = summary_table.index.map(lambda x: var_labels.get(x, x))  # Apply labels

In [None]:
# Export summary statistics to Latex
latex_table = summary_table.to_latex(
    index=True,          
    caption="Summary Statistics\\label{tabsummarystat}",  # Add caption
    column_format="lccccc",  # Align columns
    escape=False,        # Allow LaTeX special characters
    bold_rows=True,      # Bold variable names
    longtable=True       # Allow long tables if needed
)

# Save Latex Document

with open("sumtable.tex", "w", encoding="utf-8") as file:
    file.write(latex_table)

In [None]:
# ---------------------------------------
# Overestimation of severity in prior beliefs
# ---------------------------------------

# Perform the t-test for first question:
t_stat, p_value = stats.ttest_ind(df['Poverty_Line_w'], df['True_Poverty_Line'])

print(p_value, t_stat)

In [None]:
# Define function to perform t-test and return result
def perform_ttest(var1, var2, df):
    t_stat, p_value = ttest_rel(df[var1], df[var2], nan_policy='omit')  # ignore missing values
    return {"Variables": [var1, var2], "T-Statistic": t_stat, "P-Value": p_value}

#Define pairs of variables to perform t-test   
pairs_overestimation_severity = [
    ("Poverty_Line_w", "True_Poverty_Line"),
    ("Poverty_Food_w", "True_Poverty_Food"),
    ("Q_Gender_Parliament_w", "True_Gender_Parliament"),
    ("Q_Gender_WageGap_w", "True_Gender_WageGap"),
    ("Q_Economic_Umemploy_w", "True_Economic_Umemploy"),
    ("Q_Economic_Informal_w", "True_Economic_Informal"),
    ("Q_Climate_Mortality_w", "True_Climate_Mortality"),
    ("Q_Climate_Emissions_w", "True_Climate_Emissions")
]

# Perform t-tests and convert to DataFrame
ttest_results = pd.DataFrame([perform_ttest(var1, var2, df) for var1, var2 in pairs_overestimation_severity])

In [None]:
#Comparing investment propensity across social issues in control group  

# Function to perform paired t-test and Wilcoxon signed-rank test
def perform_tests(var1, var2, df, condition_var, condition_value):
    # Filter dataset for Condition == 0
    df_filtered = df[df[condition_var] == condition_value][[var1, var2]].dropna()
    
    # Perform paired t-test
    t_stat, t_p_value = ttest_rel(df_filtered[var1], df_filtered[var2])

    # Perform Wilcoxon signed-rank test
    w_stat, w_p_value = wilcoxon(df_filtered[var1], df_filtered[var2])

    return {
        "Variable 1": var1,
        "Variable 2": var2,
        "T-Statistic": t_stat,
        "T-Test P-Value": t_p_value,
        "Wilcoxon Statistic": w_stat,
        "Wilcoxon P-Value": w_p_value
    }

# Define test pairs
test_pairs = [
    ("Q_Willingness_Invest_1", "Q_Willingness_Invest_2"),
    ("Q_Willingness_Invest_1", "Q_Willingness_Invest_3"),
    ("Q_Willingness_Invest_1", "Q_Willingness_Invest_4"),
    ("Q_Willingness_Invest_2", "Q_Willingness_Invest_3"),
    ("Q_Willingness_Invest_2", "Q_Willingness_Invest_4"),
    ("Q_Willingness_Invest_3", "Q_Willingness_Invest_4")
]

# Perform tests and store results
test_results = pd.DataFrame([perform_tests(var1, var2, df, "Condition", 0) for var1, var2 in test_pairs])

In [None]:
def perform_test(var, df):
    group_0= df.loc[df['Condition']==0, var].dropna()
    group_1= df.loc[df['Condition']==1, var].dropna()
    t_stat,t_p_value= ttest_ind(group_0, group_1, equal_var=True)
    
    return {
        'Variable': var,
        'T-statistic': t_stat,
        'P-value': t_p_value
    }

ttest_var= ['Q_Willingness_Invest_1', 'Q_Willingness_Invest_2', 'Q_Willingness_Invest_3', 'Q_Willingness_Invest_4']

ttest_results= pd.DataFrame([perform_test(var, df,) for var in ttest_var])

In [None]:
# Treatment Effects: Effectiveness Impact Investments
# Define function to perform an independent t-test between two groups
def perform_ttest(var, df):
    group_0 = df.loc[df['Condition'] == 0, var].dropna()
    group_1 = df.loc[df['Condition'] == 1, var].dropna()
    
    t_stat, p_value = ttest_ind(group_0, group_1, equal_var=True)  

    return {"Variable": var, "T-Statistic": t_stat, "P-Value": p_value}

# Define variables 
ttest_vars = ["Effectiveness_Post_1", "Effectiveness_Post_2", "Effectiveness_Post_3", "Effectiveness_Post_4"]

# Perform t-tests and store results
ttest_results = pd.DataFrame([perform_ttest(var, df) for var in ttest_vars])

In [None]:
#Layout for plots
plt.rcParams.update({
    'font.size': 10,
    'font.family': 'serif',
    'axes.edgecolor': 'black',
    'axes.linewidth': 1,
})

In [None]:
#Bar graphs to plot treatment effects in investment propensity across issues
# ---- Define CI function ----
def group_mean_ci(data, group_col, value_col):
    result = []
    for group in data[group_col].unique():
        group_data = data[data[group_col] == group][value_col].dropna()
        mean = group_data.mean()
        se = group_data.std(ddof=1) / np.sqrt(len(group_data))
        ci = 1.96 * se  # 95% confidence interval (normal approx)
        result.append((group, mean, ci))
    return pd.DataFrame(result, columns=[group_col, 'mean', 'ci'])

# ---- Define variables and titles ----
variables = [
    ('Q_Willingness_Invest_1', 'Investment Propensity for\nPoverty Reduction'),
    ('Q_Willingness_Invest_2', 'Investment Propensity for\nDecent Work & Economic Growth'),
    ('Q_Willingness_Invest_3', 'Investment Propensity for\nGender Equality'),
    ('Q_Willingness_Invest_4', 'Investment Propensity for\nClimate Change'),
]

# ---- Create 2x2 subplot figure ----
fig, axes = plt.subplots(2, 2, figsize=(8, 6))  
axes = axes.flatten()

for i, (var, title) in enumerate(variables):
    grouped = group_mean_ci(df, 'Condition', var)

    # Bar plot
    axes[i].bar(grouped['Condition'], grouped['mean'],
                yerr=grouped['ci'],
                color=['#b3b3b3', '#4d4d4d'], edgecolor='black', capsize=5)
    
    axes[i].set_xticks([0, 1])
    axes[i].set_xticklabels(['Control', 'Treatment'])
    axes[i].set_ylabel("Investment Propensity")
    axes[i].set_title(title)

    
    axes[i].text(-0.3, 1.15, f"({chr(97+i)})", transform=axes[i].transAxes,
             fontsize=14, fontweight='bold')


plt.tight_layout()
plt.show()

In [None]:
# Plotting Treatment Effects Future SRI Share:
# ---- Compute means, CIs, and t-test ----
def compute_group_stats(df, group_col, value_col):
    # Separate groups
    group0 = df[df[group_col] == 0][value_col].dropna()
    group1 = df[df[group_col] == 1][value_col].dropna()

    # Means
    mean0 = group0.mean()
    mean1 = group1.mean()

    # Standard Errors
    se0 = group0.std(ddof=1) / np.sqrt(len(group0))
    se1 = group1.std(ddof=1) / np.sqrt(len(group1))

    # 95% CI (approx, normal dist)
    ci0 = 1.96 * se0
    ci1 = 1.96 * se1

    # T-test
    t_stat, p_val = stats.ttest_ind(group0, group1, equal_var=True)

    return {
        'Control Mean': mean0,
        'Treatment Mean': mean1,
        'Control CI': ci0,
        'Treatment CI': ci1,
        'Difference': mean1 - mean0,
        't-stat': t_stat,
        'p-value': p_val
    }

# ---- Apply function ----
stats_dict = compute_group_stats(df, 'Condition', 'Future_Share_SRI')

# ---- Plot ----
fig, ax = plt.subplots(figsize=(5, 4))
means = [stats_dict['Control Mean'], stats_dict['Treatment Mean']]
cis = [stats_dict['Control CI'], stats_dict['Treatment CI']]
ax.bar(['Control', 'Treatment'], means, yerr=cis, capsize=5, color=['#b3b3b3', '#4d4d4d'], edgecolor='black')
ax.set_ylabel("Future ESG Share")
ax.set_title("Future ESG Share by Condition")

plt.tight_layout()
plt.show()

# ---- Show results in a table ----

def significance_stars(p):
    if p < 0.01:
        return '***'
    elif p < 0.05:
        return '**'
    elif p < 0.1:
        return '*'
    else:
        return ''

# ---- Format values ----
control = round(stats_dict['Control Mean'], 2)
treatment = round(stats_dict['Treatment Mean'], 2)
diff = round(stats_dict['Difference'], 2)
t_stat = round(stats_dict['t-stat'], 2)
stars = significance_stars(stats_dict['p-value'])

# ---- Print  results as a table ----
print(f"{'Group':<12} {'Value':>10}")
print("-" * 24)
print(f"{'Control':<12} {control:>10.2f}")
print(f"{'Treatment':<12} {treatment:>10.2f}")
print(f"{'Difference':<12} {diff:>10.2f}{stars}")
print(f"{'t-statistic':<12} {t_stat:>10.2f}")

In [None]:
# Plotting Treatment Effects Policy Support:
# ---- Compute means, CIs, and t-test ----
def compute_group_stats(df, group_col, value_col):
    # Separate groups
    group0 = df[df[group_col] == 0][value_col].dropna()
    group1 = df[df[group_col] == 1][value_col].dropna()

    # Means
    mean0 = group0.mean()
    mean1 = group1.mean()

    # Standard Errors
    se0 = group0.std(ddof=1) / np.sqrt(len(group0))
    se1 = group1.std(ddof=1) / np.sqrt(len(group1))

    # 95% CI (approx, normal dist)
    ci0 = 1.96 * se0
    ci1 = 1.96 * se1

    # T-test
    t_stat, p_val = stats.ttest_ind(group0, group1, equal_var=True)

    return {
        'Control Mean': mean0,
        'Treatment Mean': mean1,
        'Control CI': ci0,
        'Treatment CI': ci1,
        'Difference': mean1 - mean0,
        't-stat': t_stat,
        'p-value': p_val
    }

# ---- Apply function ----
stats_dict = compute_group_stats(df, 'Condition', 'Q_Policy_Support_1')

# ---- Plot ----
fig, ax = plt.subplots(figsize=(5, 4))
means = [stats_dict['Control Mean'], stats_dict['Treatment Mean']]
cis = [stats_dict['Control CI'], stats_dict['Treatment CI']]
ax.bar(['Control', 'Treatment'], means, yerr=cis, capsize=5, color=['#b3b3b3', '#4d4d4d'], edgecolor='black')
ax.set_ylabel(" ESG Policy Support")
ax.set_title("ESG Policy Support by Condition")

plt.tight_layout()
plt.show()

# ---- Show results in a table ----

def significance_stars(p):
    if p < 0.01:
        return '***'
    elif p < 0.05:
        return '**'
    elif p < 0.1:
        return '*'
    else:
        return ''

# ---- Format values ----
control = round(stats_dict['Control Mean'], 2)
treatment = round(stats_dict['Treatment Mean'], 2)
diff = round(stats_dict['Difference'], 2)
t_stat = round(stats_dict['t-stat'], 2)
stars = significance_stars(stats_dict['p-value'])

# ---- Print  results as a table ----
print(f"{'Group':<12} {'Value':>10}")
print("-" * 24)
print(f"{'Control':<12} {control:>10.2f}")
print(f"{'Treatment':<12} {treatment:>10.2f}")
print(f"{'Difference':<12} {diff:>10.2f}{stars}")
print(f"{'t-statistic':<12} {t_stat:>10.2f}")


In [None]:
# Regression models for Investment propensity and ESG variables
# ---- Define all regression formulas ----
formulas = {
    'Poverty Reduction': 'Q_Willingness_Invest_1_std ~ Condition + Gender + Age + High_Income + Social_Preferences_std + RiskPreferences_1_std + TimePreferences_1_std + FL_all + investor + Awareness_SustainInv + Q_Prior_InvestImpact_1_std + Dev_Poverty_avg',
    'Decent Work & Economic Growth': 'Q_Willingness_Invest_2_std ~ Condition + Gender + Age + High_Income + Social_Preferences_std + RiskPreferences_1_std + TimePreferences_1_std + FL_all + investor + Awareness_SustainInv + Q_Prior_InvestImpact_2_std + Dev_Work_avg',
    'Gender Equality': 'Q_Willingness_Invest_3_std ~ Condition + Gender + Age + High_Income + Social_Preferences_std + RiskPreferences_1_std + TimePreferences_1_std + FL_all + investor + Awareness_SustainInv + Q_Prior_InvestImpact_3_std + Dev_Gender_avg',
    'Climate Change': 'Q_Willingness_Invest_4_std ~ Condition + Gender + Age + High_Income + Social_Preferences_std + RiskPreferences_1_std + TimePreferences_1_std + FL_all + investor + Awareness_SustainInv + Q_Prior_InvestImpact_4_std + Dev_Climate_avg',
    'Future SRI Share': 'Future_Share_SRI ~ Condition + Gender + Age + High_Income + Social_Preferences_std + RiskPreferences_1_std + TimePreferences_1_std + FL_all + investor + Awareness_SustainInv + Q_Prior_InvestImpact_1_std + Dev_Poverty_avg + Q_Prior_InvestImpact_2_std + Dev_Work_avg + Q_Prior_InvestImpact_3_std + Dev_Gender_avg + Q_Prior_InvestImpact_4_std + Dev_Climate_avg',
    'ESG Policy Support': 'Q_Policy_Support_std ~ Condition + Gender + Age + High_Income + Social_Preferences_std + RiskPreferences_1_std + TimePreferences_1_std + FL_all + investor + Awareness_SustainInv + Q_Prior_InvestImpact_1_std + Dev_Poverty_avg + Q_Prior_InvestImpact_2_std + Dev_Work_avg + Q_Prior_InvestImpact_3_std + Dev_Gender_avg + Q_Prior_InvestImpact_4_std + Dev_Climate_avg'
}

results = {}

for name, formula in formulas.items():
    model = smf.ols(formula, data=df).fit(cov_type='HC1')
    results[name] = model
def extract_summary(model):
    coefs = model.params.round(2)
    ses = model.bse.round(2)
    stars = model.pvalues.apply(lambda p: '***' if p < 0.01 else '**' if p < 0.05 else '*' if p < 0.1 else '')
    summary_df = pd.DataFrame({
        'coef': coefs,
        'se': ses,
        'star': stars
    })
    summary_df['coef_star'] = summary_df['coef'].astype(str) + summary_df['star']
    return summary_df[['coef_star', 'se']]

# Group 1: Propensities
propensity_titles = [
    "Poverty Reduction",
    "Decent Work & Economic Growth",
    "Gender Equality",
    "Climate Change"
]
propensity_results = {k: results[k] for k in propensity_titles}

# Group 2: ESG outcomes
esg_titles = ['Future SRI Share', 'ESG Policy Support']
esg_results = {k: results[k] for k in esg_titles}


In [None]:
# Combine and export Propensity Table
prop_table = pd.concat(
    {name: extract_summary(model) for name, model in propensity_results.items()},
    axis=1
)
prop_table.columns = pd.MultiIndex.from_tuples(prop_table.columns)

prop_table.to_latex(
    "Propensity_FL_table.tex",
    index=True,
    multirow=True,
    float_format="%.2f",
    caption="Investment Propensity for Different Social Issues – Financial Literacy Subsample",
    label="tab:propensity_fl",
    bold_rows=True
)

# Combine and export ESG Table
esg_table = pd.concat(
    {name: extract_summary(model) for name, model in esg_results.items()},
    axis=1
)
esg_table.columns = pd.MultiIndex.from_tuples(esg_table.columns)

esg_table.to_latex(
    "ESG_FL_table.tex",
    index=True,
    multirow=True,
    float_format="%.2f",
    caption="Future ESG Share and Policy Support – Financial Literacy Subsample",
    label="tab:esg_fl",
    bold_rows=True
)


In [None]:
from IPython.display import display
display(prop_table)

In [None]:
display(esg_table)

In [None]:
#Treatment Effects Effectiveness Impact Investments Beliefs
prior_vars = {
    'Poverty Reduction': 'Q_Prior_InvestImpact_1',
    'Decent Work': 'Q_Prior_InvestImpact_2',
    'Gender Equality': 'Q_Prior_InvestImpact_3',
    'Climate Change': 'Q_Prior_InvestImpact_4'
}

posterior_vars = {
    'Poverty Reduction': 'Effectiveness_Post_1',
    'Decent Work': 'Effectiveness_Post_2',
    'Gender Equality': 'Effectiveness_Post_3',
    'Climate Change': 'Effectiveness_Post_4'
}

def compute_ttest_stats(df, var, group_col='Condition'):
    group0 = df[df[group_col] == 0][var].dropna()
    group1 = df[df[group_col] == 1][var].dropna()
    
    control_mean = group0.mean()
    treatment_mean = group1.mean()
    pooled_mean = pd.concat([group0, group1]).mean()
    diff = treatment_mean - control_mean
    
    t_stat, p_val = ttest_ind(group1, group0, equal_var=False)
    
    if p_val < 0.01:
        stars = '***'
    elif p_val < 0.05:
        stars = '**'
    elif p_val < 0.1:
        stars = '*'
    else:
        stars = ''
    
    return {
        'Control': round(control_mean, 2),
        'Treatment': round(treatment_mean, 2),
        'Pooled': round(pooled_mean, 2),
        'Difference': f"{round(diff, 2)}{stars}",
        't-statistic': round(t_stat, 2)
    }

prior_results = {label: compute_ttest_stats(df, var) for label, var in prior_vars.items()}
posterior_results = {label: compute_ttest_stats(df, var) for label, var in posterior_vars.items()}

prior_df = pd.DataFrame(prior_results).T
posterior_df = pd.DataFrame(posterior_results).T


In [None]:
# Create LaTeX string
def df_to_latex_with_panel(df, panel_title):
    latex = f"\\multicolumn{{6}}{{l}}{{\\textbf{{{panel_title}}}}} \\\\\n"
    for index, row in df.iterrows():
        latex += f"{index} & {row['Control']} & {row['Treatment']} & {row['Pooled']} & {row['Difference']} & {row['t-statistic']} \\\\\n"
    return latex

header = (
    "\\begin{table}[!htbp] \\centering\n"
    "\\caption{Beliefs Effectiveness Impact Investments}\n"
    "\\label{tab:beliefs_effectiveness}\n"
    "\\begin{tabular}{lccccc}\n"
    "\\toprule\n"
    " & Control & Treatment & Pooled & Difference & t-statistic \\\\\n"
    "\\midrule\n"
)

panel_a = df_to_latex_with_panel(prior_df, "Panel A: Prior Beliefs")
panel_b = df_to_latex_with_panel(posterior_df, "Panel B: Posterior Beliefs")

footer = "\\bottomrule\n\\end{tabular}\n\\end{table}"

latex_table = header + panel_a + "\\addlinespace\n" + panel_b + footer

with open("Beliefs_Impact_Investment.tex", "w") as f:
    f.write(latex_table)