In [2]:
import pandas as pd 
import numpy as np
from semopy import Model, calc_stats
loadings_df = pd.read_csv("loadings.csv")
print(loadings_df.head())

  Unnamed: 0   Factor1   Factor2   Factor3   Factor4
0      BFI_O  0.367498  0.077352 -0.365540  0.172630
1      BFI_C -0.177268  0.814139  0.089723 -0.097728
2      BFI_E  0.143003  0.621649  0.040725  0.099741
3      BFI_A  0.218809  0.736865 -0.063858 -0.036205
4      BFI_N  0.022131  0.558109 -0.255283  0.047528


In [3]:


# Load the wide-format loadings
loadings_df = pd.read_csv("loadings.csv", index_col=0)

# Melt to long format
long_loadings = loadings_df.reset_index().melt(id_vars='index', var_name='latent', value_name='loading')
long_loadings = long_loadings.rename(columns={'index': 'indicator'})

# Build measurement model lines (fixed loadings)
measurement_lines = []
for latent in long_loadings['latent'].unique():
    subset = long_loadings[long_loadings['latent'] == latent]
    # Only include nonzero loadings (optional, but usually preferred)
    indicators = [f"{row['loading']}*{row['indicator']}" for _, row in subset.iterrows() if abs(row['loading']) > 1e-6]
    line = f"{latent} =~ " + " + ".join(indicators)
    measurement_lines.append(line)

# Print the measurement model syntax
semopy_line = ""
for line in measurement_lines:
    semopy_line += line + "\n"

print(semopy_line)

Factor1 =~ 0.367498099335114*BFI_O + -0.1772675374185255*BFI_C + 0.1430031278388357*BFI_E + 0.218809075030115*BFI_A + 0.0221305639172322*BFI_N + 0.0448045914668764*IUS_Total + 0.0104546718816105*NFC_Total + 0.88290969611021*IRI_PT + 0.9022117748801*IRI_FS + 0.7946583305150291*IRI_EC + 0.4492009801242347*IRI_PD + 0.1238427799609504*PANAS_PA + -0.0485774522783508*PANAS_NA + 0.2979312847626743*SVS_Openness + 0.097240854317276*SVS_Conservation
Factor2 =~ 0.0773517389475304*BFI_O + 0.8141394046443691*BFI_C + 0.6216487611226236*BFI_E + 0.736864506243802*BFI_A + 0.5581093690649195*BFI_N + -0.1420683651601575*IUS_Total + 0.0477739565192317*NFC_Total + -0.1573273459921017*IRI_PT + 0.0785302693693554*IRI_FS + 0.0900561339878711*IRI_EC + 0.1883091983992059*IRI_PD + 0.3270445110214159*PANAS_PA + -0.0880577211864588*PANAS_NA + 0.0190637734459821*SVS_Openness + 0.3321475930329605*SVS_Conservation
Factor3 =~ -0.3655398016209181*BFI_O + 0.0897227664889323*BFI_C + 0.0407252735749797*BFI_E + -0.06385839

In [4]:


# ------------------------------------------------------------------------------------
# 0) CONFIG — EDIT THESE TO MATCH YOUR CSV
# ------------------------------------------------------------------------------------
CSV_PATH = "final_final_allsubj_personality_coded.csv"   # <-- set your file path

# Subscale / construct columns already in your CSV:
CONSTRUCTS = [
    "BFI_O","BFI_C","BFI_E","BFI_A","BFI_N",
    "NFC_Total",
    "IUS_Total",  # Originally IUS_Pros + IUS_Inhib, combined into IUS Total (change later?)
    "IRI_PT","IRI_FS","IRI_EC","IRI_PD",
    "PANAS_PA","PANAS_NA",
    "SVS_Openness","SVS_Conservation"
]

# Outcomes (must be present)
Y_OPPOSE  = "delta_oppose" #delta_oppose
Y_SUPPORT = "delta_support" #delta_support
Y_COMBINED = "Delta_all"     # will be created below

# Flags
STANDARDIZE_CONSTRUCTS = True
USE_ABS_UPDATES = False      # True => |Delta_oppose|, |Delta_support| individually
COMBINED_MODE = "mean_abs_components"  # "mean_abs_components" or abs_of_mean

# ------------------------------------------------------------------------------------
# 1) LOAD & PREP
# ------------------------------------------------------------------------------------
df = pd.read_csv(CSV_PATH)
# rename columns to fit expected names
df = df.rename(columns={'opposing_update': 'delta_oppose', 'supporting_update': 'delta_support'})

# Ensure outcomes exist
for y in [Y_OPPOSE, Y_SUPPORT]:
    if y not in df.columns:
        raise ValueError(f"Outcome column '{y}' is missing from the CSV.")

# Optionally convert each update to absolute magnitude
#if USE_ABS_UPDATES:
#    df[Y_OPPOSE]  = df[Y_OPPOSE].abs()
#    df[Y_SUPPORT] = df[Y_SUPPORT].abs()
#

# Create combined outcome
if COMBINED_MODE == "mean_abs_components":
    # Recommended: average of magnitudes
    df[Y_COMBINED] = (df[Y_OPPOSE].abs() + df[Y_SUPPORT].abs()) / 2.0
elif COMBINED_MODE == "abs_of_mean":
    # Alternative: absolute of the mean
    df[Y_COMBINED] = ((df[Y_OPPOSE] + df[Y_SUPPORT]) / 2.0).abs()
else:
    raise ValueError("COMBINED_MODE must be 'mean_abs_components' or 'abs_of_mean'.")

# Keep only columns we need; drop rows with missing values
available_constructs = [c for c in CONSTRUCTS if c in df.columns]
if not available_constructs:
    raise ValueError("None of the specified CONSTRUCTS were found in the CSV.")

use_cols = available_constructs + [Y_OPPOSE, Y_SUPPORT, Y_COMBINED]
data = df[use_cols].dropna().copy()

# Standardize constructs (not outcomes) for interpretability of standardized paths
if STANDARDIZE_CONSTRUCTS:
    for c in available_constructs:
        s = data[c].std(ddof=0)
        if pd.notnull(s) and s > 0:
            data[c] = (data[c] - data[c].mean()) / s

print(f"n (after dropna) = {len(data)}")
print("Predictors used:", available_constructs)
print("Combined mode:", COMBINED_MODE, "| USE_ABS_UPDATES:", USE_ABS_UPDATES)

# ------------------------------------------------------------------------------------
# 2) Helper: fit one SEM path model for a single outcome
# ------------------------------------------------------------------------------------
def fit_sem_path(data, constructs, outcome):
    """Fits a path model: outcome ~ all constructs (observed)."""
    rhs = " + ".join(constructs)
    structural = f"{outcome} ~ Factor1 + Factor2 + Factor3 + Factor4\n"
    full_model = semopy_line + structural
    m = Model(full_model)
    m.fit(data)

    stats = calc_stats(m)
    est = m.inspect(std_est=True)

    
    # Extract and sort structural paths by absolute standardized estimate
    betas = est[(est["op"]=="~") & (est["lval"]==outcome)].copy()
    betas["abs_std"] = betas["Est. Std"].abs()
    betas = betas.sort_values("abs_std", ascending=False)

    # Print summary
    print(f"\n=== SEM Path for {outcome} ===")
    #print({k: stats[k] for k in ["CFI","TLI","RMSEA","SRMR","AIC","BIC"]})
    #no latent variables like SRMR, so we'll just print what we have
    fit_indices = ["CFI","TLI","RMSEA","SRMR","AIC","BIC"]
    available_stats = {k: stats[k] for k in fit_indices if k in stats}

    #rmsea = float(stats["RMSEA"])
    #print(f"RMSEA = {rmsea:.3f}")
    print(f"stats: {available_stats}")
    #r2 = stats["r2"].get(outcome, np.nan)

    r2_dict = stats.get("r2", {})      # returns {} if 'r2' key is missing
    r2 = r2_dict.get(outcome, np.nan)  # returns np.nan if outcome not present
    
    
    print(f"R^2({outcome}) = {r2:.3f}")
    print("\nStandardized paths (sorted by |Std. Estimate|):")
    print(betas[["rval","Estimate","Std. Err","z-value","p-value","Est. Std"]])

    return {"model": m, "stats": stats, "betas": betas, "r2": r2, "model_syntax": full_model}

# ------------------------------------------------------------------------------------
# 3) Fit THREE separate models
# ------------------------------------------------------------------------------------
res_opp = fit_sem_path(data, available_constructs, Y_OPPOSE)
res_sup = fit_sem_path(data, available_constructs, Y_SUPPORT)
res_all = fit_sem_path(data, available_constructs, Y_COMBINED)

# ------------------------------------------------------------------------------------
# 4) Save coefficient tables (optional)
# ------------------------------------------------------------------------------------
res_opp["betas"].to_csv("sem_paths_Delta_oppose.csv", index=False)
res_sup["betas"].to_csv("sem_paths_Delta_support.csv", index=False)
res_all["betas"].to_csv("sem_paths_Delta_all.csv", index=False)
print("\nSaved: sem_paths_Delta_oppose.csv / sem_paths_Delta_support.csv / sem_paths_Delta_all.csv")


n (after dropna) = 87
Predictors used: ['BFI_O', 'BFI_C', 'BFI_E', 'BFI_A', 'BFI_N', 'NFC_Total', 'IUS_Total', 'IRI_PT', 'IRI_FS', 'IRI_EC', 'IRI_PD', 'PANAS_PA', 'PANAS_NA', 'SVS_Openness', 'SVS_Conservation']
Combined mode: mean_abs_components | USE_ABS_UPDATES: False

=== SEM Path for delta_oppose ===
stats: {'CFI': Value    0.967441
Name: CFI, dtype: float64, 'TLI': Value    0.963141
Name: TLI, dtype: float64, 'RMSEA': Value    0.043848
Name: RMSEA, dtype: float64, 'AIC': Value    57.1603
Name: AIC, dtype: float64, 'BIC': Value    131.137544
Name: BIC, dtype: float64}
R^2(delta_oppose) = nan

Standardized paths (sorted by |Std. Estimate|):
       rval  Estimate  Std. Err   z-value   p-value  Est. Std
62  Factor3 -0.963561  0.355813 -2.708058  0.006768 -0.282643
63  Factor4  0.713638  0.365168  1.954272  0.050669  0.209348
61  Factor2 -0.300680  0.412896 -0.728222  0.466478 -0.088000
60  Factor1 -0.190514  0.395057 -0.482243  0.629633 -0.055744

=== SEM Path for delta_support ===
st