In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.stats.anova as anova

merged_df = pd.read_excel('Cox_Word_Properties.xlsx')
new_formula_cue = "Q('Cue Memorability') ~ CD + Concreteness + Morphemes + ON_Size + PN_Size + BOI"
new_cue_model = sm.formula.ols(new_formula_cue, data=merged_df).fit()
new_formula_target = "Q('Target Memorability') ~ CD + Concreteness + Morphemes + ON_Size + PN_Size + BOI"
new_target_model = sm.formula.ols(new_formula_target, data=merged_df).fit()

# --- Function to extract detailed stats for table ---
def get_regression_stats_for_table(model_results, data_df):
    """
    Extracts statistics needed for the combined table, including CI for standardized beta.
    Returns a dictionary where keys are variable names.
    """
    stats_dict = {}
    # --- Standard Deviations ---
    sd_y = np.std(model_results.model.endog)
    # Ensure exog is treated correctly (might need adjustment if intercept isn't column 0)
    exog_df = pd.DataFrame(model_results.model.exog, columns=model_results.model.exog_names)

    # --- Unstandardized CI ---
    unstd_ci = model_results.conf_int()

    # --- Calculate sr² ---
    sr2_values = {}
    try:
        anova_table = anova.anova_lm(model_results, typ=2)
        ss_total = model_results.centered_tss
        if ss_total != 0 and not np.isnan(ss_total):
            predictors_in_anova = anova_table.index.drop(['Intercept', 'Residual'], errors='ignore')
            for predictor in predictors_in_anova:
                 if predictor in model_results.pvalues.index:
                    sr2_values[predictor] = anova_table['sum_sq'][predictor] / ss_total
    except Exception as e:
        print(f"Warning: Could not calculate sr² for model. Error: {e}")

    # --- Calculate stats for each predictor ---
    for var in model_results.params.index:
        if var == 'Intercept':
            continue # Skip intercept

        b = model_results.params[var]
        pval = model_results.pvalues[var]
        lower_B, upper_B = unstd_ci.loc[var]
        sr2 = sr2_values.get(var, np.nan)

        beta = np.nan
        lower_beta = np.nan
        upper_beta = np.nan

        if var in exog_df.columns and sd_y != 0:
            sd_x = np.std(exog_df[var])
            scale = sd_x / sd_y
            beta = b * scale
            lower_beta = lower_B * scale
            upper_beta = upper_B * scale
        else:
             print(f"Warning: Could not find SD or scale for variable '{var}'")


        # Determine significance stars
        stars = ""
        if pval < 0.001: stars = "***"
        elif pval < 0.01: stars = "**"
        elif pval < 0.05: stars = "*"

        stats_dict[var] = {
            'beta': beta,
            'lower_beta': lower_beta,
            'upper_beta': upper_beta,
            'p_value': pval,
            'stars': stars,
            'sr2': sr2
        }
    return stats_dict

# --- Extract stats for both models ---
cue_stats = get_regression_stats_for_table(new_cue_model, merged_df)
target_stats = get_regression_stats_for_table(new_target_model, merged_df)

In [5]:
def generate_markdown_table_std_ci(cue_stats, target_stats, predictor_order, cue_adj_r2, target_adj_r2):
    """Generates the Markdown table string with standardized beta CI."""

    # --- Header ---
    header = """
**Table : OLS Regression of Word Properties Predicting Cue and Target Memorability**

| Word Property | Cue: β | Cue: CI 95% (β) | Cue: p-value | Cue: sr² | Target: β | Target: CI 95% (β) | Target: p-value | Target: sr² |
| :------------ | :----: | :-------------: | :----------: | :------: | :-------: | :----------------: | :-------------: | :---------: |
"""
    # --- Footer ---
    footer = """
|---------------|--------|-----------------|--------------|----------|-----------|--------------------|-----------------|-------------|
| Adj. R²       |        |                 |              | {:.3f}   |           |                    |                 |    {:.3f}    |

*Note.* β = Standardized Beta coefficient. CI 95% (β) = 95% Confidence Interval for the standardized coefficient β. sr² = Squared semi-partial correlation (unique variance explained). p-values < 0.001 are shown as <0.001. Significance codes based on p-value: * p < 0.05, ** p < 0.01, *** p < 0.001. CI values are calculated by scaling the unstandardized CIs. sr² values are derived from Type II ANOVA sums of squares.
""".format(cue_adj_r2, target_adj_r2) # Format R2 into footer

    table_body = ""

    # --- Format p-value helper function ---
    def format_p_markdown(p, stars):
        p_val = float(p) # Ensure p is float
        if np.isnan(p_val): return "N/A"
        if p_val < 0.001: return "<0.001" + stars
        return f"{p_val:.3f}" + stars

    # --- Populate table rows ---
    for var in predictor_order:
        c_stat = cue_stats.get(var, {})
        t_stat = target_stats.get(var, {})

        # Format numbers or show N/A if NaN
        cue_beta_str = f"{c_stat.get('beta', np.nan):.3f}" if not np.isnan(c_stat.get('beta', np.nan)) else "N/A"
        cue_ci_str = f"({c_stat.get('lower_beta', np.nan):.3f}, {c_stat.get('upper_beta', np.nan):.3f})" if not np.isnan(c_stat.get('lower_beta', np.nan)) else "(N/A, N/A)"
        cue_p_str = format_p_markdown(c_stat.get('p_value', np.nan), c_stat.get('stars', ''))
        cue_sr2_str = f"{c_stat.get('sr2', np.nan):.3f}" if not np.isnan(c_stat.get('sr2', np.nan)) else "N/A"

        tgt_beta_str = f"{t_stat.get('beta', np.nan):.3f}" if not np.isnan(t_stat.get('beta', np.nan)) else "N/A"
        tgt_ci_str = f"({t_stat.get('lower_beta', np.nan):.3f}, {t_stat.get('upper_beta', np.nan):.3f})" if not np.isnan(t_stat.get('lower_beta', np.nan)) else "(N/A, N/A)"
        tgt_p_str = format_p_markdown(t_stat.get('p_value', np.nan), t_stat.get('stars', ''))
        tgt_sr2_str = f"{t_stat.get('sr2', np.nan):.3f}" if not np.isnan(t_stat.get('sr2', np.nan)) else "N/A"


        table_body += f"| {var.replace('_',' '):<13} | {cue_beta_str:>6} | {cue_ci_str:>15} | {cue_p_str:>12} | {cue_sr2_str:>8} | {tgt_beta_str:>9} | {tgt_ci_str:>18} | {tgt_p_str:>15} | {tgt_sr2_str:>11} |\n"

    return header + table_body + footer



# --- Define predictor order ---
predictor_order = ['CD', 'Concreteness', 'Morphemes', 'ON_Size', 'PN_Size', 'BOI']

# --- Get Adjusted R-squared ---
cue_adj_r2_val = new_cue_model.rsquared_adj
target_adj_r2_val = new_target_model.rsquared_adj

# --- Generate and print the Markdown table string ---
markdown_table_string = generate_markdown_table_std_ci(cue_stats, target_stats, predictor_order, cue_adj_r2_val, target_adj_r2_val)
print(markdown_table_string)
print("\n" + "="*80 + "\n")




**Table : OLS Regression of Word Properties Predicting Cue and Target Memorability**

| Word Property | Cue: β | Cue: CI 95% (β) | Cue: p-value | Cue: sr² | Target: β | Target: CI 95% (β) | Target: p-value | Target: sr² |
| :------------ | :----: | :-------------: | :----------: | :------: | :-------: | :----------------: | :-------------: | :---------: |
| CD            |  0.115 |  (0.026, 0.205) |       0.012* |    0.011 |     0.204 |     (0.102, 0.307) |       <0.001*** |       0.035 |
| Concreteness  |  0.422 |  (0.285, 0.559) |    <0.001*** |    0.065 |     0.045 |    (-0.111, 0.200) |           0.571 |       0.001 |
| Morphemes     | -0.061 | (-0.171, 0.049) |        0.274 |    0.002 |    -0.005 |    (-0.130, 0.120) |           0.942 |       0.000 |
| ON Size       | -0.225 | (-0.315, -0.135) |    <0.001*** |    0.043 |    -0.099 |    (-0.201, 0.003) |           0.056 |       0.008 |
| PN Size       |  0.013 | (-0.096, 0.123) |        0.812 |    0.000 |     0.011 |    (-0.113,