In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import beta
from concurrent.futures import ProcessPoolExecutor, as_completed

# --- Load GLM results ---
with open("glm_binomial_topk_results.pkl", "rb") as f:
    results_real = pickle.load(f)

with open("glm_results_topk_shuffled.pkl", "rb") as f:
    results_shuffled = pickle.load(f)

# --- Parallel-safe function to extract Cox–Snell pseudo-R² ---
def get_pseudo_r2_cs_wrapper(result):
    try:
        if result is not None:
            dev = result.deviance
            null_dev = result.null_deviance
            r2_cs = 1 - (dev / null_dev)
            if r2_cs > 0:
                return r2_cs
    except Exception:
        pass
    return None

def get_pseudo_r2_cs_summary_wrapper(result):
    try:
        if result is not None:
            res_sum = result.summary()  # force resolution of lazy computation
            dev = result.deviance
            null_dev = result.null_deviance
            r2_cs = 1 - (dev / null_dev)
            if r2_cs > 0:
                return r2_cs
    except Exception:
        pass
    return None

# --- Use multiprocessing to compute R² in parallel ---
def parallel_map(fn, iterable, desc="Processing", max_workers=None):
    results = []
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(fn, item) for item in iterable]
        for fut in as_completed(futures):
            results.append(fut.result())
    return results

print("🧠 Computing R² values for real data...")
r2_real = parallel_map(get_pseudo_r2_cs_wrapper, results_real)

print("🧠 Computing R² values for shuffled data...")
r2_shuf = parallel_map(get_pseudo_r2_cs_summary_wrapper, results_shuffled)

# --- Collect valid pairs ---
valid_pairs = [
    (r, s) for r, s in zip(r2_real, r2_shuf)
    if r is not None and s is not None
]

if len(valid_pairs) == 0:
    raise RuntimeError("❌ No valid R² pairs found. Check model fits and data!")

# --- Unpack and compute differences ---
r2_real_valid, r2_shuf_valid = map(np.array, zip(*valid_pairs))
r2_diff = r2_real_valid - r2_shuf_valid

# --- Bayesian test: P(real > shuffled) ---
k = np.sum(r2_diff > 0)
n = len(r2_diff)
posterior = beta(0.5 + k, 0.5 + (n - k))
p_real_better = posterior.mean()

# --- Print summary ---
print(f"✅ Valid neurons: {n}")
print(f"📊 Fraction with real > shuffled: {k}/{n} = {k/n:.3f}")
print(f"📈 Bayesian P(real > shuffled): {p_real_better:.3f}")

# --- Plot ---
plt.figure(figsize=(8, 4))
plt.hist(r2_diff, bins=30, color='darkcyan', edgecolor='black')
plt.axvline(0, color='red', linestyle='--', label='no difference')
plt.xlabel("Δ Cox–Snell R² (real - shuffled)")
plt.ylabel("Neuron count")
plt.title("Difference in pseudo-R² (real vs shuffled)")
plt.legend()
plt.tight_layout()
plt.show()


🧠 Computing R² values for real data...


: 

In [None]:
# Extract summary statistics from each result object
glm_summary_data = []

for i, res in enumerate(results_shuffled):
    if res is not None:
        glm_summary_data.append({
            'neuron': i,
            'params': res.params,              # coefficients (including intercept)
            'pvalues': res.pvalues,            # p-values
            'deviance': res.deviance,
            'aic': res.aic,
            'bic': res.bic,
            'converged': res.converged         # use .converged instead of .mle_retvals
        })
    else:
        glm_summary_data.append({
            'neuron': i,
            'error': True
        })

In [2]:
import pickle
with open("glm_results_topk_shuffled.pkl", "rb") as f:
    results_shuffled = pickle.load(f)

In [3]:
print(results_shuffled[0].summary().deviance)

AttributeError: 'Summary' object has no attribute 'deviance'

In [8]:
results_real

[{'neuron': 0,
  'params': array([-2.82780890e+00, -2.78895371e+02,  1.21792663e+00, -1.94746328e+00,
         -2.12845257e+00, -7.41477035e+00,  1.16068682e+02, -1.83815608e+00,
         -2.15304424e+00, -1.62798189e+00, -1.88511801e+00,  5.41598336e+00,
          1.36049669e+00, -1.39975978e+02, -1.44318540e+00, -2.02111072e+01,
         -5.32672667e+00, -4.51832002e+00, -1.20107844e+00, -3.76355996e-01,
          1.08162990e+00, -1.10694675e+00,  4.05679802e+00, -1.06391037e+00,
         -3.30428652e-01, -1.03849012e+01, -5.46804407e+00, -3.18061688e-01,
         -3.47719897e-01, -3.17180954e-01, -1.96045637e-01, -2.17539297e-01,
          7.48206820e-01, -7.77662729e+00, -5.16621336e-01, -8.23589252e-01,
          8.97629774e-02,  1.11673904e-01,  4.84499710e-01,  1.08969885e-01,
         -3.86316255e+00,  6.33020924e-01,  8.73400725e-01, -5.53530441e+00,
         -1.29090509e-01,  5.04340983e+00,  1.40922487e-01, -8.55057553e-01,
          2.58401222e-01,  8.63819122e-02,  5.73719