In [None]:
import os
import time
from sklearn.model_selection import StratifiedKFold
from sksurv.metrics import concordance_index_ipcw
from sksurv.util import Surv
from sksurv.linear_model import CoxnetSurvivalAnalysis
from lifelines import CoxPHFitter
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)

df_ready_train_long = df_ready_train
df_ready_test_long  = df_ready_test

N_RUNS     = 5
BEST_SCORE = -np.inf
BEST_PATH  = ""
TAU = 7

def zscore_fit_transform(train_scores, test_scores):
    mu = train_scores.mean()
    sd = train_scores.std()
    if sd <= 1e-12:
        sd = 1.0
    return (train_scores - mu) / sd, (test_scores - mu) / sd

for i in range(1, N_RUNS + 1):
    print(f"\n====== RUN {i} ======\n")

    def is_binary(s):
        return set(s.dropna().unique()) <= {0, 1, '0', '1', True, False}

    def encode(df):
        df = df.copy()
        for col in [c for c in df.select_dtypes(['object', 'bool']).columns if c != 'ID']:
            if is_binary(df[col]):
                df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0).astype('int8')
            else:
                df[col] = df[col].astype('category').cat.codes.replace(-1, 0).astype('int16')
        return df

    train_long = encode(df_ready_train_long.copy())
    test_long  = encode(df_ready_test_long.copy())

    train_long = train_long.replace([np.inf, -np.inf], np.nan) \
                           .dropna(subset=['OS_YEARS', 'OS_STATUS'])

    meta     = ['ID', 'OS_YEARS', 'OS_STATUS']
    features = [c for c in train_long.columns if c not in meta]

    for col in features:
        if col not in test_long.columns:
            test_long[col] = 0

    for df in (train_long, test_long):
        df[features] = df[features].apply(pd.to_numeric, errors='coerce') \
                                   .replace([np.inf, -np.inf], np.nan) \
                                   .fillna(0).astype(float)

    features = [c for c in features if train_long[c].var() > 1e-4]

    corr = train_long[features].corr().abs()
    mask = np.triu(np.ones_like(corr, bool), k=1)
    pairs = corr.where(mask).stack().loc[lambda s: s > .8]

    drop = set()
    mc   = corr.mean()
    for f1, f2, _ in pairs.reset_index().values:
        if f1 not in drop and f2 not in drop:
            drop.add(f1 if mc[f1] > mc[f2] else f2)
    features = [c for c in features if c not in drop]

    sel, cph = [], CoxPHFitter()
    for col in features:
        tmp = train_long[[col, 'OS_YEARS', 'OS_STATUS']] \
              .rename(columns={'OS_YEARS': 'T', 'OS_STATUS': 'E'})
        try:
            cph.fit(tmp, 'T', 'E', show_progress=False)
            if cph.summary.loc[col, 'p'] < .05:
                sel.append(col)
        except:
            pass

    print(f"{len(sel)} features kept")

    X_raw_tr = train_long[sel].clip(train_long[sel].quantile(0.01),
                                    train_long[sel].quantile(0.99), axis=1)
    X_raw_te = test_long[sel].clip(train_long[sel].quantile(0.01),
                                   train_long[sel].quantile(0.99), axis=1)

    scaler  = StandardScaler().fit(X_raw_tr)
    X_train = scaler.transform(X_raw_tr)
    X_test  = scaler.transform(X_raw_te)

    y_time   = train_long['OS_YEARS'].astype(float).values
    y_event  = train_long['OS_STATUS'].astype(int).values
    surv_all = Surv.from_arrays(event=y_event, time=y_time)

    rng = np.random.default_rng(42 + i)

    def relu(x): return np.maximum(x, 0.0)
    def drelu(x): return (x > 0).astype(x.dtype)

    D_in = X_train.shape[1]
    H1, H2 = 64, 32

    W1 = rng.normal(0, np.sqrt(2.0/D_in), size=(D_in, H1)); b1 = np.zeros(H1)
    W2 = rng.normal(0, np.sqrt(2.0/H1 ), size=(H1 , H2));  b2 = np.zeros(H2)
    Wo = rng.normal(0, np.sqrt(2.0/H2 ), size=(H2 , 1 ));  bo = np.zeros(1)

    def zeros_like(x): return np.zeros_like(x)
    mW1, vW1 = zeros_like(W1), zeros_like(W1)
    mW2, vW2 = zeros_like(W2), zeros_like(W2)
    mWo, vWo = zeros_like(Wo), zeros_like(Wo)
    mb1, vb1 = zeros_like(b1), zeros_like(b1)
    mb2, vb2 = zeros_like(b2), zeros_like(b2)
    mbo, vbo = zeros_like(bo), zeros_like(bo)

    lr = 1e-3
    beta1, beta2, eps = 0.9, 0.999, 1e-8
    lam = 1e-4

    order = np.argsort(-y_time)
    invord = np.empty_like(order); invord[order] = np.arange(len(order))
    e_sorted = y_event[order].astype(float)

    def cox_negloglik_and_grad_r(r):
        r_s = r[order]
        r_s -= r_s.max()
        exp_r = np.exp(r_s)
        S = np.cumsum(exp_r[::-1])[::-1]
        loss = np.log(S[e_sorted == 1]).sum() - r_s[e_sorted == 1].sum()
        inv_S = np.zeros_like(S)
        inv_S[e_sorted == 1] = 1.0 / S[e_sorted == 1]
        grad = exp_r * np.cumsum(inv_S) - e_sorted
        return loss, grad[invord]

    def forward(X):
        a1 = relu(X @ W1 + b1)
        a2 = relu(a1 @ W2 + b2)
        r  = (a2 @ Wo + bo).ravel()
        return r, (X, a1, a2)

    def backward(cache, grad_r):
        X, a1, a2 = cache
        gWo = a2.T @ grad_r[:, None] + lam * Wo
        gbo = grad_r.sum()
        ga2 = grad_r[:, None] @ Wo.T
        gz2 = ga2 * (a2 > 0)
        gW2 = a1.T @ gz2 + lam * W2
        gb2 = gz2.sum(axis=0)
        ga1 = gz2 @ W2.T
        gz1 = ga1 * (a1 > 0)
        gW1 = X.T @ gz1 + lam * W1
        gb1 = gz1.sum(axis=0)
        return gW1, gb1, gW2, gb2, gWo, gbo

    def adam_update(W, gW, mW, vW, t):
        mW = beta1*mW + (1-beta1)*gW
        vW = beta2*vW + (1-beta2)*(gW*gW)
        W -= lr * (mW / (1-beta1**t)) / (np.sqrt(vW / (1-beta2**t)) + eps)
        return W, mW, vW

    EPOCHS = 140
    best_loss, bad, t_adam = np.inf, 0, 0

    for _ in range(EPOCHS):
        r, cache = forward(X_train)
        loss, grad_r = cox_negloglik_and_grad_r(r)
        t_adam += 1

        gW1, gb1, gW2, gb2, gWo, gbo = backward(cache, grad_r)
        W1, mW1, vW1 = adam_update(W1, gW1, mW1, vW1, t_adam)
        W2, mW2, vW2 = adam_update(W2, gW2, mW2, vW2, t_adam)
        Wo, mWo, vWo = adam_update(Wo, gWo, mWo, vWo, t_adam)
        b1, mb1, vb1 = adam_update(b1, gb1, mb1, vb1, t_adam)
        b2, mb2, vb2 = adam_update(b2, gb2, mb2, vb2, t_adam)
        bo, mbo, vbo = adam_update(bo, gbo, mbo, vbo, t_adam)

        if loss < best_loss - 1e-4:
            best_loss, bad = loss, 0
        else:
            bad += 1
            if bad >= 20:
                break

    risk_tr_dl = forward(X_train)[0]
    risk_te_dl = forward(X_test)[0]

    # CoxNet on top of DL score
    risk_tr_z, risk_te_z = zscore_fit_transform(risk_tr_dl, risk_te_dl)
    X_train_aug = np.column_stack([X_train, risk_tr_z])
    X_test_aug  = np.column_stack([X_test,  risk_te_z])

    alpha_grid = np.logspace(-2, 0, 30)
    l1_grid    = np.linspace(0.001, 0.1, 35)
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=123+i)

    best_a = best_l1 = None
    best_c = -np.inf

    for l1 in l1_grid:
        for a in alpha_grid:
            scores = []
            model = CoxnetSurvivalAnalysis(alphas=[a], l1_ratio=l1, max_iter=100_000)
            for tr, va in cv.split(X_train_aug, y_event):
                model.fit(X_train_aug[tr], surv_all[tr])
                risk = model.predict(X_train_aug[va])
                scores.append(concordance_index_ipcw(surv_all[tr], surv_all[va], risk, tau=TAU)[0])
            if np.mean(scores) > best_c:
                best_c, best_a, best_l1 = np.mean(scores), a, l1

    enet = CoxnetSurvivalAnalysis(alphas=[best_a], l1_ratio=best_l1, max_iter=100_000)
    enet.fit(X_train_aug, surv_all)

    risk_tr_final = enet.predict(X_train_aug)
    risk_te_final = enet.predict(X_test_aug)

    test_long['risk_row'] = risk_te_final

    submission = test_long.groupby('ID', as_index=False).agg(risk_score=('risk_row', 'mean'))
    submission['risk_score'] = (submission['risk_score'] - submission['risk_score'].min()) / \
                               (submission['risk_score'].max() - submission['risk_score'].min())

    score = concordance_index_ipcw(surv_all, surv_all, risk_tr_final, tau=TAU)[0]
    print(f" Train C-index @7y = {score:.5f}")

    out_path = f"submission_run_{i}_score_{score:.5f}.csv"
    submission.to_csv(out_path, index=False)

    if score > BEST_SCORE:
        BEST_SCORE, BEST_PATH = score, out_path

print(f"\nBEST SCORE : {BEST_SCORE:.5f}")
print(f"SUBMISSION : {BEST_PATH}")
