# Testing the equality of variances between placebo induction strategies

In [6]:
import numpy as np
import pandas as pd
import nibabel as nib
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from scipy.stats import levene

In [7]:
base_dir = '../data'
pheno = pd.read_csv(base_dir + '/signatures.csv')
# convert x to categorical and get integer representation
pheno['plc_induction'] = pheno['Conditioning'].astype('category').cat.codes
pheno['Rating_diff'] = -1*pheno['Rating_diff']   # higher score stronger placebo
# center explanatory variables
pheno['Rating_pain_ctr'] = pheno['Rating_pain_ctr'] - pheno['Rating_pain_ctr'].mean()
pheno['Age'] = pheno['Age'] - pheno['Age'].mean()
pheno['Male'] = pheno['Male'] - pheno['Male'].mean()

In [11]:
# Define the variables
outcome_var = 'Rating_diff'
grouping_var = 'Conditioning'
random_effect_var = 'Study_ID'
covariates = ['Male', 'Age', 'Rating_pain_ctr'] # Define covariates

# --- Fit Mixed-Effects Model ---
# Ensure the random effect variable is treated as categorical/object
pheno[random_effect_var] = pheno[random_effect_var].astype(object)
# Ensure the grouping variable is also treated as categorical/object if it isn't already
pheno[grouping_var] = pheno[grouping_var].astype(object)

# Construct the formula string including covariates
formula = f"{outcome_var} ~ {' + '.join(covariates)}"

# Handle potential missing values before fitting the model
# Include covariates in the columns to check for NaNs
columns_for_model = [outcome_var, grouping_var, random_effect_var] + covariates
model_data = pheno[columns_for_model].dropna()

print(f"Fitting mixed model: {formula} + (1 | {random_effect_var})")
try:
    # Fit the LME model using the updated formula and data
    model = smf.mixedlm(formula, data=model_data, groups=model_data[random_effect_var])
    result = model.fit()
    print(result.summary())

    # Get residuals
    residuals = result.resid

    # Add residuals and grouping variable back to a temporary DataFrame for Levene's test
    # Ensure indices align if NaNs were dropped
    residuals_df = pd.DataFrame({
        'residuals': residuals,
        grouping_var: model_data[grouping_var] # Use model_data to match indices
    })


    # --- Perform Levene's Test on Residuals ---
    # Get unique groups
    groups = residuals_df[grouping_var].unique()

    # Prepare data for Levene's test: list of residual arrays for each group
    # Filter out groups with insufficient data for variance calculation if necessary
    residual_groups = []
    valid_groups = []
    for group in groups:
        group_residuals = residuals_df['residuals'][residuals_df[grouping_var] == group]
        if len(group_residuals) > 1: # Levene's test requires at least 2 samples per group
             residual_groups.append(group_residuals)
             valid_groups.append(group)
        else:
            print(f"Warning: Group '{group}' has insufficient data ({len(group_residuals)} sample(s)) and will be excluded from Levene's test.")


    if len(valid_groups) < 2:
         print("\\n--- Levene's Test on Model Residuals ---")
         print("Error: Levene's test requires at least two groups with sufficient data.")
    else:
        print("\\n--- Levene's Test on Model Residuals ---")
        # Perform the test using the median (more robust)
        stat, p_value = levene(*residual_groups, center='median')

        print(f"Groups compared: {list(valid_groups)}")
        print(f"Levene's test statistic: {stat:.4f}")
        print(f"P-value: {p_value:.12f}")

        if p_value < 0.05:
            print("Result: Reject the null hypothesis. Evidence suggests variances of residuals are unequal across Conditioning groups.")
        else:
            print("Result: Fail to reject the null hypothesis. No significant evidence of unequal residual variances across Conditioning groups.")

except Exception as e:
    print(f"An error occurred during model fitting or Levene's test: {e}")
    print("Please check your data, column names, and ensure sufficient data per group/study.")


Fitting mixed model: Rating_diff ~ Male + Age + Rating_pain_ctr + (1 | Study_ID)
          Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: Rating_diff
No. Observations:  409     Method:             REML       
No. Groups:        16      Scale:              246.0204   
Min. group size:   10      Log-Likelihood:     -1712.2135 
Max. group size:   40      Converged:          Yes        
Mean group size:   25.6                                   
----------------------------------------------------------
                Coef.  Std.Err.   z    P>|z| [0.025 0.975]
----------------------------------------------------------
Intercept       11.052    1.267  8.726 0.000  8.570 13.535
Male             2.310    1.832  1.261 0.207 -1.281  5.901
Age             -0.301    0.140 -2.146 0.032 -0.576 -0.026
Rating_pain_ctr  0.273    0.041  6.728 0.000  0.193  0.352
Group Var       15.044    0.653                           

\n--- Levene's Test on Model Residuals ---
G