<a href="https://colab.research.google.com/github/martin-rosenfeld7/QNC_martin-rosenfeld/blob/main/QNC_Presentation_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Regenerate the simulated figure with reduced spread for all "- ARS" conditions.
# We'll keep +ARS groups at CV=0.25 (L3000+ARS at 0.30), and set "- ARS" groups to CV=0.15.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from caas_jupyter_tools import display_dataframe_to_user

rng = np.random.default_rng(2025)

expected_mean = {
    "L3000 - ARS": 1.00,
    "KPNA3 - ARS": 0.98,
    "KPNA4 - ARS": 0.96,
    "KPNB1 - ARS": 0.95,
    "L3000 + ARS": 0.50,
    "KPNA3 + ARS": 0.70,
    "KPNA4 + ARS": 0.85,
    "KPNB1 + ARS": 0.95,
}

order = [
    "L3000 - ARS",
    "KPNA3 - ARS",
    "KPNA4 - ARS",
    "KPNB1 - ARS",
    "L3000 + ARS",
    "KPNA3 + ARS",
    "KPNA4 + ARS",
    "KPNB1 + ARS",
]

N_per_group = 400

# Set CVs: tighter for "- ARS", original for "+ ARS"
cv = {
    "L3000 - ARS": 0.15,
    "KPNA3 - ARS": 0.15,
    "KPNA4 - ARS": 0.15,
    "KPNB1 - ARS": 0.15,
    "L3000 + ARS": 0.30,  # slightly noisier stressed control
    "KPNA3 + ARS": 0.25,
    "KPNA4 + ARS": 0.25,
    "KPNB1 + ARS": 0.25,
}

def lognormal_params(target_mean, target_cv):
    sigma2 = np.log(target_cv**2 + 1.0)
    sigma = np.sqrt(sigma2)
    mu = np.log(target_mean) - 0.5 * sigma2
    return mu, sigma

data_by_cond = []
records = []
for cond in order:
    m = expected_mean[cond]
    c = cv[cond]
    mu, sigma = lognormal_params(m, c)
    samples = rng.lognormal(mean=mu, sigma=sigma, size=N_per_group)
    data_by_cond.append(samples)
    for v in samples:
        records.append({"condition": cond, "normalized_nuclear_TDP43": v})

sim_df = pd.DataFrame.from_records(records)
summary = (sim_df.groupby("condition")["normalized_nuclear_TDP43"]
           .agg(n="count", mean="mean", median="median", std="std",
                sem=lambda x: x.std(ddof=1)/np.sqrt(len(x)))
           .reindex(order)
           .reset_index())

display_dataframe_to_user("Simulated summary (reduced spread in - ARS)", summary)

plt.figure(figsize=(10,7))
plt.violinplot(data_by_cond, showmeans=False, showmedians=False, showextrema=False)

for i, cond in enumerate(order, start=1):
    y = data_by_cond[i-1]
    x = rng.normal(loc=i, scale=0.05, size=len(y))
    plt.plot(x, y, 'o', alpha=0.5, markersize=2)

means = [np.mean(arr) for arr in data_by_cond]
sems = [np.std(arr, ddof=1)/np.sqrt(len(arr)) for arr in data_by_cond]
plt.errorbar(range(1, len(order)+1), means, yerr=sems, fmt='s', capsize=4)

plt.xticks(range(1, len(order)+1), order, rotation=30, ha='right')
plt.ylabel("Normalized Nuclear TDP-43 Intensity")
plt.title("Nuclear_TDP-43_Intensity (Simulated Expected Results; tighter - ARS)")
plt.tight_layout()

png_path = "/mnt/data/Simulated_Nuclear_TDP43_Expected_tighter_minusARS.png"
plt.savefig(png_path, dpi=300, bbox_inches="tight")
plt.show()

csv_path = "/mnt/data/Simulated_Nuclear_TDP43_Expected_tighter_minusARS.csv"
sim_df.to_csv(csv_path, index=False)

png_path, csv_path

In [None]:
# Regenerate bar graph using mean ± standard deviation (SD) instead of SEM
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load simulated dataset
sim_df = pd.read_csv("/mnt/data/Simulated_Nuclear_TDP43_Expected_tighter_minusARS.csv")

# Compute mean ± SD
summary = sim_df.groupby("condition")["normalized_nuclear_TDP43"].agg(
    mean="mean",
    sd="std"
).reset_index()

# Define order for plotting
order = [
    "L3000 - ARS", "KPNA3 - ARS", "KPNA4 - ARS", "KPNB1 - ARS",
    "L3000 + ARS", "KPNA3 + ARS", "KPNA4 + ARS", "KPNB1 + ARS"
]
summary = summary.set_index("condition").loc[order].reset_index()

# Colors similar to example
colors = ["#0066FF", "#00CC66", "#FF9933", "#996633",
          "#FF3333", "#9933FF", "#000000", "#000066"]

# Plot
plt.figure(figsize=(8,6))
x = np.arange(len(summary))
bars = plt.bar(x, summary["mean"], yerr=summary["sd"], capsize=5, color=colors)

plt.xticks(x, summary["condition"], rotation=30, ha="right")
plt.ylabel("Normalized Nuclear TDP-43 Intensity")
plt.title("Nuclear_TDP-43_Intensity (Mean ± SD)")
plt.ylim(0, 1.6)
plt.tight_layout()

# Save as high-res PNG
ppt_png_sd = "/mnt/data/Simulated_Nuclear_TDP43_BarGraph_MeanSD_PPT.png"
plt.savefig(ppt_png_sd, dpi=600, bbox_inches="tight", transparent=False)
plt.show()

ppt_png_sd
