# HCR-NN end-to-end demo (uses **hcr_nn** package)

This notebook demonstrates a full pipeline **using your real library**:

- empirical CDF normalization via `hcr_nn.layers.CDFNorm`,
- conditional model `hcr_nn.models.HCRCond2D` (via `build_hcr_cond2d`),
- basis functions from `hcr_nn.basis`,
- conditional densities and expectations from `hcr_nn.density`,
- (mini demo) `hcr_nn.neuron.HCRNeuron`.

We train `HCRCond2D` by minimizing MSE between the predicted **E[u1|u2]** and the empirical **u1** (both in [0,1]).

**Run from repo root or from `examples/`** – the path tool below will make `import hcr_nn` work either way.

In [None]:
# --- Imports & path setup -------------------------------------------------
import os, sys, math, random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from typing import Callable

np.random.seed(42); random.seed(42); torch.manual_seed(42)

# Make sure we can import hcr_nn whether we run from repo root or examples/
CWD = os.path.abspath(os.getcwd())
REPO_ROOT = os.path.abspath(os.path.join(CWD, '..')) if os.path.basename(CWD) == 'examples' else CWD
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

from hcr_nn.layers import CDFNorm
from hcr_nn.models import build_hcr_cond2d, HCRCond2D
from hcr_nn.basis import select_basis
from hcr_nn.density import conditional_density, expected_u1_given_u2
from hcr_nn.neuron import HCRNeuron

print('hcr_nn imported. Repo root =', REPO_ROOT)

## Load or generate data (x,y), then normalize to [0,1] with empirical CDF

We try to load any CSV under `data/` that has columns `x,y`. If none is found, we generate a small correlated dataset and save it so the notebook is reproducible.

In [None]:
DATA_DIR = os.path.join(REPO_ROOT, 'data')
os.makedirs(DATA_DIR, exist_ok=True)

def find_xy_csv(dirpath):
    for fn in os.listdir(dirpath):
        if fn.lower().endswith('.csv'):
            try:
                df = pd.read_csv(os.path.join(dirpath, fn))
                if {'x','y'}.issubset({c.strip().lower() for c in df.columns}):
                    return os.path.join(dirpath, fn)
            except Exception:
                pass
    return None

csv_path = find_xy_csv(DATA_DIR)
if csv_path is None:
    n = 1000
    x = np.random.normal(0, 1, n)
    y = 0.6 * x + 0.4 * np.random.normal(0, 1, n)
    df = pd.DataFrame({'x': x, 'y': y})
    csv_path = os.path.join(DATA_DIR, '_generated_end_to_end.csv')
    df.to_csv(csv_path, index=False)
    print('[INFO] generated dataset at', os.path.relpath(csv_path, REPO_ROOT))
else:
    df = pd.read_csv(csv_path)[['x','y']]
    print('[OK] using dataset', os.path.relpath(csv_path, REPO_ROOT))

df.head()

Use **empirical CDF** from `hcr_nn.layers.CDFNorm(method='empirical')` to get quantiles `u∈[0,1]` for each column independently.

In [None]:
cdf = CDFNorm(method='empirical', affine=False, track_running_stats=False)

with torch.no_grad():
    x_t = torch.tensor(df['x'].to_numpy(), dtype=torch.float32)
    y_t = torch.tensor(df['y'].to_numpy(), dtype=torch.float32)
    u1 = cdf(x_t)  # quantiles for x
    u2 = cdf(y_t)  # quantiles for y

u = torch.stack([u1, u2], dim=1)  # (N,2) in [0,1]
u[:5], u.min().item(), u.max().item()

## Build the conditional model (`HCRCond2D`) with polynomial basis

We pass a **quantile function** so the model can consume **raw** inputs \(x,y\) and internally map them to \(u\in[0,1]\). Here the quantile function just calls `CDFNorm` column-wise (empirical CDF).

In [None]:
def quantile_fn_raw_to_u(x_raw: torch.Tensor) -> torch.Tensor:
    """
    x_raw: (B,2) tensor of raw values
    returns: (B,2) quantiles in [0,1]
    """
    # apply EDF per column independently
    # For batch calls we reuse the CDFNorm defined above, which is stateless in 'empirical' mode.
    col1 = cdf(x_raw[:,0])
    col2 = cdf(x_raw[:,1])
    return torch.stack([col1, col2], dim=1)

# Build the model
model: HCRCond2D = build_hcr_cond2d(
    degree=8,
    basis='polynomial',
    grid_size=256,
    coeff_init='xavier',
    quantile_fn=quantile_fn_raw_to_u,
    dtype=torch.float32,
    device=None,
)
model

## Train: minimize MSE between predicted **E[u1|u2]** and empirical **u1**

We'll train only the coefficient matrix `a` inside `HCRCond2D` (parameter `model.coeffs`). Input is raw `[x,y]`, target is `u1` (empirical CDF of `x`).

In [None]:
X_raw = torch.tensor(df[['x','y']].to_numpy(), dtype=torch.float32)
y_target = u[:,0]  # u1

# simple train/val split
n = X_raw.shape[0]
idx = torch.randperm(n)
n_train = int(0.8*n)
tr, va = idx[:n_train], idx[n_train:]
Xtr, Xva = X_raw[tr], X_raw[va]
ytr, yva = y_target[tr], y_target[va]

opt = optim.Adam([model.coeffs], lr=1e-2)
loss_fn = nn.MSELoss()

EPOCHS = 200
for ep in range(1, EPOCHS+1):
    model.train()
    opt.zero_grad()
    pred = model(Xtr).reshape(-1)
    loss = loss_fn(pred, ytr)
    loss.backward(); opt.step()

    if ep % 40 == 0 or ep == 1:
        model.eval()
        with torch.no_grad():
            val = model(Xva).reshape(-1)
            vloss = loss_fn(val, yva).item()
        print(f"epoch {ep:3d} | train MSE = {loss.item():.5f} | val MSE = {vloss:.5f}")

print('done.')

## Visualize **E[u1|u2]** curve vs. normalized data (u2,u1)

We compute the conditional curve using `model.conditional_curve(u2_grid)` and overlay it on the scatter of `(u2,u1)`.

In [None]:
with torch.no_grad():
    u2_grid = torch.linspace(0,1,201)
    curve = model.conditional_curve(u2_grid)

plt.figure(figsize=(6.0,4.0))
plt.scatter(u[:,1].numpy(), u[:,0].numpy(), s=8, alpha=0.3, label='data (u2 vs u1)')
plt.plot(u2_grid.numpy(), curve.numpy(), lw=2, label='E[u1|u2] (model)')
plt.xlabel('u2'); plt.ylabel('u1'); plt.title('Conditional expectation E[u1|u2]')
plt.legend(); plt.tight_layout(); plt.show()

## Cross-check with `hcr_nn.density`
We independently compute conditional densities using your **density utilities** and compare a few expected values with the model curve.

In [None]:
with torch.no_grad():
    # use the same basis as model
    basis = model.basis  # already configured (polynomial degree)
    deg = model.basis_dim - 1
    coeffs = model.coeffs.detach()
    grid = torch.linspace(0,1,201)
    picks = [0.1, 0.3, 0.5, 0.7, 0.9]
    approx_e = []
    for u2s in picks:
        # density-based expectation
        e = expected_u1_given_u2(u2s, coeffs, basis, deg, grid_size=201)
        approx_e.append(e)
    approx_e = np.array(approx_e)
    # model's own conditional curve
    curve_e = model.conditional_curve(torch.tensor(picks, dtype=torch.float32)).numpy()

pd.DataFrame({'u2': picks, 'E_u1_density_py': approx_e, 'E_u1_model_curve': curve_e})

Small differences are expected due to grid resolution / numerical details. They should be broadly aligned.

## (Mini) Using `HCRNeuron` directly
A quick taste of your `neuron.HCRNeuron` with a 1D basis (e.g. polynomial with degree=4). We apply it to `u2` just as a demonstration of feature expansion + linear map.

In [None]:
from hcr_nn.basis import PolynomialBasis

basis1d = PolynomialBasis(degree=4)
neuron = HCRNeuron(basis=basis1d, out_features=2)
with torch.no_grad():
    demo_out = neuron(u[:,1])  # feed u2
demo_out.shape, demo_out[:3]

## Save artifacts (for CI or to inspect later)
We store the trained coefficient matrix and a small JSON with metrics.

In [None]:
ART_DIR = os.path.join(REPO_ROOT, 'paper_models')
os.makedirs(ART_DIR, exist_ok=True)
np.save(os.path.join(ART_DIR, 'hcr_cond2d_coeffs.npy'), model.coeffs.detach().cpu().numpy())
with torch.no_grad():
    pred_va = model(Xva).reshape(-1)
    val_mse = nn.functional.mse_loss(pred_va, yva).item()
with open(os.path.join(ART_DIR, 'hcr_cond2d_metrics.txt'), 'w') as f:
    f.write(f'val_mse={val_mse:.6f}\n')
print('Saved: paper_models/hcr_cond2d_coeffs.npy and hcr_cond2d_metrics.txt')