# Visualize conditional dependence results
Load the saved toy dataset and plot how the contamination becomes visible only after conditioning on the context variable Z.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# The default location assumes data are stored one level up from the repo root
# so the notebook can be launched from within the experiments/ directory.
dataset_path = "../data/toy_data.npz"
data = np.load(dataset_path)
x, y, z = data["X"], data["Y"], data["Z"]

print(f"Loaded dataset from {dataset_path} with shapes: X={x.shape}, Y={y.shape}, Z={z.shape}")


## Global view: X and Y appear independent
A quick scatter plot and Pearson correlation across the full dataset should look nearly independent when we ignore the contextual variable Z.

In [None]:
plt.figure(figsize=(6, 4))
plt.scatter(x, y, s=4, alpha=0.25, color="#4e79a7")
plt.xlabel("Candidate systematic X")
plt.ylabel("Shear Y")
plt.title("Global view: near-independence without conditioning")
plt.tight_layout()
plt.show()

corr = np.corrcoef(x, y)[0, 1]
print(f"Global Pearson r ≈ {corr:.3f}")


## Conditional slice: dependence activates when Z > 0
When we focus on positive contexts, the contamination term turns on and X→Y dependence becomes visible.

In [None]:
mask_positive = z > 0
plt.figure(figsize=(6, 4))
plt.scatter(x[mask_positive], y[mask_positive], s=6, alpha=0.3, color="#f28e2b", label="Z > 0")
plt.scatter(x[~mask_positive], y[~mask_positive], s=4, alpha=0.1, color="#9ea3a6", label="Z ≤ 0")
plt.xlabel("Candidate systematic X")
plt.ylabel("Shear Y")
plt.title("Conditional dependence emerges for Z > 0")
plt.legend(frameon=False)
plt.tight_layout()
plt.show()

corr_positive = np.corrcoef(x[mask_positive], y[mask_positive])[0, 1]
corr_negative = np.corrcoef(x[~mask_positive], y[~mask_positive])[0, 1]
print(f"Pearson r | Z>0 ≈ {corr_positive:.3f}")
print(f"Pearson r | Z≤0 ≈ {corr_negative:.3f}")


## Dependence strength across context bins
Binning by Z shows a smooth transition from near-zero correlation to strong positive correlation as the contamination activates.

In [None]:
num_bins = 6
bins = np.linspace(z.min(), z.max(), num_bins + 1)
bin_indices = np.digitize(z, bins) - 1
bin_centers = 0.5 * (bins[:-1] + bins[1:])
correlations = []

for i in range(num_bins):
    mask = bin_indices == i
    if mask.sum() < 2:
        correlations.append(np.nan)
        continue
    correlations.append(np.corrcoef(x[mask], y[mask])[0, 1])

plt.figure(figsize=(6, 4))
plt.plot(bin_centers, correlations, marker="o", color="#e15759")
plt.xlabel("Context Z (bin centers)")
plt.ylabel("Pearson r(X, Y) within bin")
plt.title("Conditional dependence strength as Z varies")
plt.axhline(0, color="black", linewidth=0.8, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.show()


## Context vs. shear: contamination gating
Plotting Y directly against the context Z highlights the activation of contamination as Z increases.

In [None]:
plt.figure(figsize=(6, 4))
plt.scatter(z, y, s=4, alpha=0.3, color="#59a14f")
plt.xlabel("Context Z")
plt.ylabel("Shear Y")
plt.title("Conditional activation of contamination by context")
plt.tight_layout()
plt.show()
