In [1]:
# Minimal submission swapper: copy a precomputed blend to submission.csv and verify
import os, shutil, pandas as pd
src = 'submission_blend_w8_B.csv'
dst = 'submission.csv'
assert os.path.exists(src), f'Missing {src}; ensure the blended files exist.'
shutil.copyfile(src, dst)
size = os.path.getsize(dst)
print(f'[Swap] {dst} <- {src} ({size} bytes)')
df = pd.read_csv(dst)
print('[submission.csv] head:')
print(df.head())
print('[submission.csv] shape:', df.shape)

[Swap] submission.csv <- submission_blend_w8_B.csv (5237 bytes)
[submission.csv] head:
    Id                                           Sequence
0  300  5 9 1 2 18 3 8 4 20 13 12 15 7 14 11 19 6 10 1...
1  301  12 2 10 1 5 4 20 6 11 15 13 7 19 9 8 18 14 3 1...
2  302  17 16 12 5 9 13 19 7 20 18 11 3 4 6 15 8 1 14 ...
3  303  18 13 4 15 17 12 10 5 19 20 1 11 8 16 9 7 3 6 ...
4  304  8 1 12 14 18 13 9 7 2 11 3 15 20 19 10 5 6 17 ...
[submission.csv] shape: (95, 2)


In [2]:
# Apply permutation rescue to a chosen precomputed submission and write to submission.csv
import os, pandas as pd
src = 'submission_blend_w7_A.csv'  # per expert advice: top candidate
assert os.path.exists(src), f'Missing {src}'

def rescue_perm20(seq_str: str) -> str:
    arr = [int(x) for x in str(seq_str).split()]
    seen, dup_idx = set(), []
    for i, x in enumerate(arr):
        if x in seen:
            dup_idx.append(i)
        else:
            seen.add(x)
    missing = [k for k in range(1, 21) if k not in seen]
    for j, i in enumerate(dup_idx):
        if j < len(missing):
            arr[i] = missing[j]
    return ' '.join(map(str, arr))

df = pd.read_csv(src)
df['Sequence'] = df['Sequence'].map(rescue_perm20)
dst = 'submission.csv'
df.to_csv(dst, index=False)
print(f'[Rescue+Swap] {dst} <- {src} ({os.path.getsize(dst)} bytes)')
print(df.head())
print('All rows length/unique check (first 3):', [ (len(s.split()), len(set(s.split()))) for s in df['Sequence'].head(3) ])

[Rescue+Swap] submission.csv <- submission_blend_w7_A.csv (5237 bytes)
    Id                                           Sequence
0  300  5 9 1 2 18 3 8 4 20 7 13 12 15 14 11 19 6 10 1...
1  301  12 2 11 10 1 5 4 20 6 15 13 7 19 9 8 18 14 3 1...
2  302  17 16 12 5 7 1 9 13 19 20 18 11 3 4 6 15 8 14 ...
3  303  18 13 4 15 17 12 10 5 19 20 1 11 16 8 9 7 3 6 ...
4  304  8 1 12 14 18 13 9 7 2 11 3 15 20 19 5 10 6 17 ...
All rows length/unique check (first 3): [(20, 20), (20, 20), (20, 20)]


In [3]:
# Apply permutation rescue to w6_A per expert order, then write to submission.csv
import os, pandas as pd
src = 'submission_blend_w6_A.csv'
assert os.path.exists(src), f'Missing {src}'

def rescue_perm20(seq_str: str) -> str:
    arr = [int(x) for x in str(seq_str).split()]
    seen, dup_idx = set(), []
    for i, x in enumerate(arr):
        if x in seen:
            dup_idx.append(i)
        else:
            seen.add(x)
    missing = [k for k in range(1, 21) if k not in seen]
    for j, i in enumerate(dup_idx):
        if j < len(missing):
            arr[i] = missing[j]
    return ' '.join(map(str, arr))

df = pd.read_csv(src)
df['Sequence'] = df['Sequence'].map(rescue_perm20)
dst = 'submission.csv'
df.to_csv(dst, index=False)
print(f'[Rescue+Swap] {dst} <- {src} ({os.path.getsize(dst)} bytes)')
print(df.head())
print('All rows length/unique check (first 3):', [ (len(s.split()), len(set(s.split()))) for s in df['Sequence'].head(3) ])

[Rescue+Swap] submission.csv <- submission_blend_w6_A.csv (5237 bytes)
    Id                                           Sequence
0  300  5 9 1 2 18 3 8 4 20 13 7 12 15 14 11 19 6 10 1...
1  301  12 2 11 10 1 5 4 20 6 15 13 7 19 9 8 18 14 3 1...
2  302  17 16 12 5 7 1 9 13 19 20 18 11 3 4 6 15 8 14 ...
3  303  18 13 4 15 17 12 10 5 19 20 1 11 16 8 9 7 3 6 ...
4  304  8 1 12 14 18 13 9 7 2 11 3 15 20 19 5 10 6 17 ...
All rows length/unique check (first 3): [(20, 20), (20, 20), (20, 20)]


In [4]:
# Apply permutation rescue to w7_B (third candidate), then write to submission.csv
import os, pandas as pd
src = 'submission_blend_w7_B.csv'
assert os.path.exists(src), f'Missing {src}'

def rescue_perm20(seq_str: str) -> str:
    arr = [int(x) for x in str(seq_str).split()]
    seen, dup_idx = set(), []
    for i, x in enumerate(arr):
        if x in seen:
            dup_idx.append(i)
        else:
            seen.add(x)
    missing = [k for k in range(1, 21) if k not in seen]
    for j, i in enumerate(dup_idx):
        if j < len(missing):
            arr[i] = missing[j]
    return ' '.join(map(str, arr))

df = pd.read_csv(src)
df['Sequence'] = df['Sequence'].map(rescue_perm20)
dst = 'submission.csv'
df.to_csv(dst, index=False)
print(f'[Rescue+Swap] {dst} <- {src} ({os.path.getsize(dst)} bytes)')
print(df.head())
print('All rows length/unique check (first 3):', [ (len(s.split()), len(set(s.split()))) for s in df['Sequence'].head(3) ])

[Rescue+Swap] submission.csv <- submission_blend_w7_B.csv (5237 bytes)
    Id                                           Sequence
0  300  5 9 1 2 18 3 8 4 20 13 12 15 7 14 11 19 6 10 1...
1  301  12 2 10 1 5 4 20 6 11 15 13 7 19 9 8 18 14 3 1...
2  302  17 16 12 5 9 13 19 7 20 18 11 3 4 6 15 8 1 14 ...
3  303  18 13 4 15 17 12 10 5 19 20 1 11 8 16 9 7 3 6 ...
4  304  8 1 12 14 18 13 9 7 2 11 3 15 20 19 10 5 6 17 ...
All rows length/unique check (first 3): [(20, 20), (20, 20), (20, 20)]


In [5]:
# Apply permutation rescue to w6_B (hedge) and write to submission.csv
import os, pandas as pd
src = 'submission_blend_w6_B.csv'
assert os.path.exists(src), f'Missing {src}'

def rescue_perm20(seq_str: str) -> str:
    arr = [int(x) for x in str(seq_str).split()]
    seen, dup_idx = set(), []
    for i, x in enumerate(arr):
        if x in seen:
            dup_idx.append(i)
        else:
            seen.add(x)
    missing = [k for k in range(1, 21) if k not in seen]
    for j, i in enumerate(dup_idx):
        if j < len(missing):
            arr[i] = missing[j]
    return ' '.join(map(str, arr))

df = pd.read_csv(src)
df['Sequence'] = df['Sequence'].map(rescue_perm20)
dst = 'submission.csv'
df.to_csv(dst, index=False)
print(f'[Rescue+Swap] {dst} <- {src} ({os.path.getsize(dst)} bytes)')
print(df.head())
print('All rows length/unique check (first 3):', [ (len(s.split()), len(set(s.split()))) for s in df['Sequence'].head(3) ])

[Rescue+Swap] submission.csv <- submission_blend_w6_B.csv (5237 bytes)
    Id                                           Sequence
0  300  5 9 1 2 18 3 8 4 20 13 12 15 7 14 11 19 6 10 1...
1  301  12 2 10 1 5 4 20 6 11 15 7 13 19 9 8 18 14 3 1...
2  302  17 16 12 5 1 9 13 19 7 20 18 11 3 4 6 15 8 10 ...
3  303  18 13 4 15 17 12 10 5 19 20 1 11 8 16 9 7 3 6 ...
4  304  8 1 12 14 18 13 9 7 2 11 3 15 20 19 10 5 6 17 ...
All rows length/unique check (first 3): [(20, 20), (20, 20), (20, 20)]


In [6]:
# Apply permutation rescue to w6_C (conservative) and write to submission.csv
import os, pandas as pd
src = 'submission_blend_w6_C.csv'
assert os.path.exists(src), f'Missing {src}'

def rescue_perm20(seq_str: str) -> str:
    arr = [int(x) for x in str(seq_str).split()]
    seen, dup_idx = set(), []
    for i, x in enumerate(arr):
        if x in seen:
            dup_idx.append(i)
        else:
            seen.add(x)
    missing = [k for k in range(1, 21) if k not in seen]
    for j, i in enumerate(dup_idx):
        if j < len(missing):
            arr[i] = missing[j]
    return ' '.join(map(str, arr))

df = pd.read_csv(src)
df['Sequence'] = df['Sequence'].map(rescue_perm20)
dst = 'submission.csv'
df.to_csv(dst, index=False)
print(f'[Rescue+Swap] {dst} <- {src} ({os.path.getsize(dst)} bytes)')
print(df.head())
print('All rows length/unique check (first 3):', [ (len(s.split()), len(set(s.split()))) for s in df['Sequence'].head(3) ])

[Rescue+Swap] submission.csv <- submission_blend_w6_C.csv (5237 bytes)
    Id                                           Sequence
0  300  5 9 1 2 18 3 8 4 20 13 12 15 7 14 11 19 6 10 1...
1  301  12 2 20 10 1 5 4 6 11 15 7 13 19 9 8 18 14 3 1...
2  302  17 16 12 5 1 9 13 19 7 20 18 11 3 4 6 15 8 10 ...
3  303  18 13 4 15 17 12 10 5 19 20 1 11 8 16 9 7 3 6 ...
4  304  8 1 12 14 18 13 9 7 2 11 3 15 20 19 10 5 6 17 ...
All rows length/unique check (first 3): [(20, 20), (20, 20), (20, 20)]


In [18]:
# Parameterized rescue runner: set src to any precomputed CSV, rescue to permutation-20, write submission.csv
import os, pandas as pd

def rescue_perm20(seq_str: str) -> str:
    arr = [int(x) for x in str(seq_str).split()]
    seen, dup_idx = set(), []
    for i, x in enumerate(arr):
        if x in seen:
            dup_idx.append(i)
        else:
            seen.add(x)
    missing = [k for k in range(1, 21) if k not in seen]
    for j, i in enumerate(dup_idx):
        if j < len(missing):
            arr[i] = missing[j]
    return ' '.join(map(str, arr))

# Choose next candidate to try (change this line for future iterations):
src = 'submission_blend_w8_C.csv'
assert os.path.exists(src), f'Missing {src}'
df = pd.read_csv(src)
df['Sequence'] = df['Sequence'].map(rescue_perm20)
dst = 'submission.csv'
df.to_csv(dst, index=False)
print(f'[Rescue+Swap] {dst} <- {src} ({os.path.getsize(dst)} bytes)')
print(df.head())
print('All rows length/unique check (first 3):', [ (len(s.split()), len(set(s.split()))) for s in df['Sequence'].head(3) ])

[Rescue+Swap] submission.csv <- submission_blend_w8_C.csv (5237 bytes)
    Id                                           Sequence
0  300  5 9 1 2 18 3 8 4 20 13 12 15 7 14 11 19 6 10 1...
1  301  12 2 20 10 1 5 4 6 11 15 13 7 19 9 8 18 14 3 1...
2  302  17 16 12 5 9 13 19 7 20 18 11 3 4 6 15 8 1 14 ...
3  303  18 13 4 15 17 12 10 5 19 20 1 11 8 16 9 7 3 6 ...
4  304  8 1 12 14 18 13 9 7 2 11 3 15 20 19 10 5 6 17 ...
All rows length/unique check (first 3): [(20, 20), (20, 20), (20, 20)]


In [12]:
# Calibrated exact-20 + Hungarian decode using existing test probs (fast, no retrain)
import os, glob, numpy as np, pandas as pd
from scipy.special import softmax, logit
from scipy.signal import convolve
from scipy.optimize import linear_sum_assignment

CFG = dict(
    test_dirs = ['cache_probs', 'cache_probs_v15'],  # [v16, v15]
    oof_dirs = ['oof_probs_v16'],
    use_v15 = True, w16=0.7, w15=0.3,
    alpha = 0.85, bg_bias = 0.20, smooth_win = 3,
    temp_grid = np.linspace(0.7, 1.6, 12).astype(np.float32),
    eps = 1e-6,
)

def _find_npz(d, vid):
    # Try multiple name patterns: raw id, zero-padded, any substring match
    cands = [
        os.path.join(d, f'{vid}.npz'),
        os.path.join(d, f'{vid:05d}.npz'),
        os.path.join(d, f'test_{vid:05d}.npz'),
        os.path.join(d, f'{vid}_probs.npz'),
    ]
    for fn in cands:
        if os.path.exists(fn):
            return fn
    hits = glob.glob(os.path.join(d, f'*{vid:05d}*.npz'))
    if hits:
        return hits[0]
    hits = glob.glob(os.path.join(d, f'*{vid}*.npz'))
    return hits[0] if hits else None

def load_probs_dir(d, ids):
    out = {}
    for vid in ids:
        fn = _find_npz(d, vid)
        if not fn:
            continue
        z = np.load(fn, allow_pickle=True)
        if 'probs' in z:
            P = z['probs'].astype(np.float32)
        elif 'P' in z:
            P = z['P'].astype(np.float32)
        elif 'logits' in z:
            L = z['logits'].astype(np.float32)
            P = softmax(L, axis=1).astype(np.float32)
        else:
            continue
        out[vid] = P
    return out

def blend_probs(map16, map15, ids, w16, w15, use_v15):
    res = {}
    for vid in ids:
        P16 = map16.get(vid)
        if P16 is None:
            continue
        if use_v15 and (vid in map15):
            P = (w16*P16 + w15*map15[vid]).astype(np.float32)
            P /= np.clip(P.sum(1, keepdims=True), 1e-8, None)
        else:
            P = P16
        res[vid] = P
    return res

def load_oof_frames(oof_dirs):
    X, Y = [], []
    for d in oof_dirs:
        for fn in glob.glob(os.path.join(d, '*.npz')):
            z = np.load(fn, allow_pickle=True)
            if 'probs' in z:
                P = z['probs'].astype(np.float32)
            elif 'P' in z:
                P = z['P'].astype(np.float32)
            else:
                continue
            if 'y' in z:
                y = z['y'].astype(np.int32)
            elif 'labels' in z:
                y = z['labels'].astype(np.int32)
            else:
                continue
            if P.ndim == 2 and len(y) == P.shape[0]:
                X.append(P); Y.append(y)
    if not X:
        return None, None
    return np.concatenate(X, 0), np.concatenate(Y, 0)

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    if P_oof is None or y_oof is None:
        return np.ones(21, dtype=np.float32)
    C = P_oof.shape[1]
    Tcls = np.ones(C, dtype=np.float32)
    y = y_oof
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6)
        z = logit(pc)
        yc = (y == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            # Balanced NLL proxy
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best:
                best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logits(P, Tcls, bg_bias, smooth_win):
    P = np.clip(P, CFG['eps'], 1-CFG['eps']).astype(np.float32)
    Z = logit(P).astype(np.float32)
    Z = Z / Tcls.reshape(1, -1)
    Z[:, 0] += float(bg_bias)
    if smooth_win and smooth_win > 1:
        k = np.ones(int(smooth_win), np.float32) / float(smooth_win)
        Z = np.stack([convolve(Z[:, i], k, mode='same') for i in range(Z.shape[1])], 1).astype(np.float32)
    return Z

def segment_exact20(margin, K, min_len):
    T = int(len(margin))
    min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1:
        min_len -= 1
    if K*min_len > T:
        K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32)
    bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            # Linear scan; T ~ 1-3k so still fast on 95 vids
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv:
                    bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []
    k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse()
    return bounds

def decode_video(Z, alpha):
    T = Z.shape[0]
    min_len = max(2, int(alpha * T / 20.0))
    margin = (Z[:, 1:21].max(1) - Z[:, 0]).astype(np.float32)
    segs = segment_exact20(margin, 20, min_len)
    C = np.zeros((len(segs), 20), np.float32)
    for j, (s, e) in enumerate(segs):
        C[j, :] = -Z[s:e, 1:21].mean(0)
    r, c = linear_sum_assignment(C)
    order = [int(c[j])+1 for j in np.argsort(r)]
    return order

test_ids = pd.read_csv('test.csv')['Id'].tolist()
map16 = load_probs_dir(CFG['test_dirs'][0], test_ids)
map15 = load_probs_dir(CFG['test_dirs'][1], test_ids) if CFG['use_v15'] else {}
P_oof, y_oof = load_oof_frames(CFG['oof_dirs'])
Tcls = fit_per_class_temperature(P_oof, y_oof, CFG['temp_grid'])
rows = []
blend_map = blend_probs(map16, map15, test_ids, CFG['w16'], CFG['w15'], CFG['use_v15'])
for vid in sorted(blend_map.keys()):
    P = blend_map[vid]
    Z = to_calibrated_logits(P, Tcls, CFG['bg_bias'], CFG['smooth_win'])
    seq = decode_video(Z, CFG['alpha'])
    rows.append((vid, ' '.join(map(str, seq))))
sub = pd.DataFrame(rows, columns=['Id', 'Sequence']).sort_values('Id')
sub.to_csv('submission.csv', index=False)
print('Wrote submission.csv', sub.shape)

Wrote submission.csv (0, 2)


In [13]:
# Inspect cache prob directories to determine filename patterns
import os, glob, itertools
dirs = ['cache_probs', 'cache_probs_v15', 'oof_probs_v16', 'oof_probs_v15']
for d in dirs:
    if not os.path.isdir(d):
        print(f'[Missing dir] {d}');
        continue
    files = sorted(glob.glob(os.path.join(d, '*.npz')))
    print(f'[{d}] count={len(files)}')
    for fn in files[:5]:
        print('  ', os.path.basename(fn))
    for fn in files[-5:]:
        print('  ', os.path.basename(fn))
    # Try to extract an example test id from filenames
    sample = files[:10]
    print(f'[{d}] sample patterns:')
    for fn in sample:
        base = os.path.basename(fn)
        print('   -', base)

# Also list any files matching likely test id patterns
for pat in ['*003*.npz','*test_*.npz','*Sample*.npz','*train_*.npz','*val*.npz','*id*.npz']:
    hits = sorted(glob.glob(os.path.join('cache_probs', pat)))
    print(f'[cache_probs] pattern {pat}: {len(hits)} hits')
    for h in hits[:5]:
        print('   ', os.path.basename(h))
    hits15 = sorted(glob.glob(os.path.join('cache_probs_v15', pat)))
    print(f'[cache_probs_v15] pattern {pat}: {len(hits15)} hits')
    for h in hits15[:5]:
        print('   ', os.path.basename(h))

[cache_probs] count=277
   valprobs_00410.npz
   valprobs_00411.npz
   valprobs_00412.npz
   valprobs_00413.npz
   valprobs_00414.npz
   valprobs_00706.npz
   valprobs_00707.npz
   valprobs_00708.npz
   valprobs_00709.npz
   valprobs_00710.npz
[cache_probs] sample patterns:
   - valprobs_00410.npz
   - valprobs_00411.npz
   - valprobs_00412.npz
   - valprobs_00413.npz
   - valprobs_00414.npz
   - valprobs_00415.npz
   - valprobs_00416.npz
   - valprobs_00417.npz
   - valprobs_00418.npz
   - valprobs_00420.npz
[cache_probs_v15] count=277
   valprobs_00410.npz
   valprobs_00411.npz
   valprobs_00412.npz
   valprobs_00413.npz
   valprobs_00414.npz
   valprobs_00706.npz
   valprobs_00707.npz
   valprobs_00708.npz
   valprobs_00709.npz
   valprobs_00710.npz
[cache_probs_v15] sample patterns:
   - valprobs_00410.npz
   - valprobs_00411.npz
   - valprobs_00412.npz
   - valprobs_00413.npz
   - valprobs_00414.npz
   - valprobs_00415.npz
   - valprobs_00416.npz
   - valprobs_00417.npz
   - valpr

In [19]:
# Auto-discovered weighted rank-ensemble over all precomputed submission_*.csv (excluding current submission.csv)
import pandas as pd, numpy as np, os, glob

cand = sorted([f for f in glob.glob('submission*.csv') if os.path.basename(f) != 'submission.csv'])
assert cand, 'No candidate submission_*.csv files found'
dfs = []
kept = []
for f in cand:
    try:
        df = pd.read_csv(f).sort_values('Id')
        if df.shape[0] == 95 and set(df.columns) == {'Id','Sequence'}:
            dfs.append(df); kept.append(f)
    except Exception:
        pass
assert dfs, 'No valid candidate CSVs with 95 rows'

def weight_for(name: str) -> float:
    # Heuristic weights by filename pattern
    n = name.lower()
    w = 1.0
    if 'w8_b' in n: w = 0.70
    elif 'w8_' in n: w = 0.80
    elif 'w7_c' in n or 'w6_c' in n: w = 0.90
    elif 'w7_b' in n or 'w6_b' in n: w = 0.95
    elif 'blend_w7_a' in n or 'blend_w6_a' in n: w = 1.00
    elif 'blend_a' in n: w = 0.95
    elif name == 'submission_A.csv': w = 0.85
    elif name == 'submission_B.csv': w = 0.88
    elif name == 'submission_C.csv': w = 0.88
    else:
        w = 0.90
    return float(w)

weights = np.array([weight_for(os.path.basename(f)) for f in kept], dtype=np.float32)
ids = dfs[0]['Id'].tolist()
K = 20
seqs = []
for i in range(len(ids)):
    rank_sum = np.zeros(K+1, dtype=np.float32)  # classes 1..20
    for j, df in enumerate(dfs):
        arr = [int(x) for x in str(df.iloc[i].Sequence).split()]
        for pos, cls in enumerate(arr):
            if 1 <= cls <= 20:
                rank_sum[cls] += weights[j] * (pos + 1)
    order = np.argsort(rank_sum[1:]) + 1  # best (lowest) rank first
    seqs.append(' '.join(map(str, order)))
out = pd.DataFrame({'Id': ids, 'Sequence': seqs})
out.to_csv('submission.csv', index=False)
print('[RankEnsemble-Auto] Wrote submission.csv', out.shape, 'from', kept, 'weights', weights.tolist())

[RankEnsemble-Extended] Wrote submission.csv (95, 2) from ['submission_blend_w7_A.csv', 'submission_blend_w6_A.csv', 'submission_blend_w7_B.csv', 'submission_blend_A.csv', 'submission_A.csv', 'submission_B.csv', 'submission_C.csv', 'submission_blend_w8_A.csv', 'submission_blend_w8_C.csv'] weights [1.0, 0.949999988079071, 0.8999999761581421, 0.75, 0.6000000238418579, 0.6499999761581421, 0.6499999761581421, 0.550000011920929, 0.550000011920929]


In [21]:
# Build test probs from cached features + saved XGB models, then calibrated exact-20 + Hungarian decode
import os, numpy as np, pandas as pd, xgboost as xgb

test_ids = pd.read_csv('test.csv')['Id'].tolist()
map16, map15 = {}, {}

# Try to load both boosters and infer their expected feature counts to map to cache_v16 (193) vs cache_v15 (120)
bst_a = bst_b = None
if os.path.exists('xgb_train.model'):
    try:
        bst_a = xgb.Booster(); bst_a.load_model('xgb_train.model')
    except Exception as e:
        print('[Warn] Failed load xgb_train.model:', e); bst_a=None
if os.path.exists('xgb_train_v15.model'):
    try:
        bst_b = xgb.Booster(); bst_b.load_model('xgb_train_v15.model')
    except Exception as e:
        print('[Warn] Failed load xgb_train_v15.model:', e); bst_b=None

def booster_nfeat(bst):
    try:
        return int(bst.num_features())
    except Exception:
        # Fallback: try predict on dummy
        return None

pairs = []  # list of (bst, cache_dir, tag)
for bst, tag in ((bst_a,'A'), (bst_b,'B')):
    if bst is None: continue
    nf = booster_nfeat(bst)
    if nf is None:
        # Guess by trying v16 first
        nf = -1
    if nf == 193:
        pairs.append((bst, 'cache_v16', f'{tag}:v16(193)'))
    elif nf == 120:
        pairs.append((bst, 'cache_v15', f'{tag}:v15(120)'))
    else:
        # Try to detect by checking one file's feature count
        probe_id = test_ids[0]
        for cdir, expect, label in (('cache_v16', 193, 'v16'), ('cache_v15', 120, 'v15')):
            fn = os.path.join(cdir, f'test_{probe_id:05d}.npz')
            if os.path.exists(fn):
                X = np.load(fn, allow_pickle=False)['X']
                if X.shape[1] == expect:
                    pairs.append((bst, cdir, f'{tag}:{label}({expect})'))
                    break

print('[Models] Using pairs:', [p[2] for p in pairs])

def load_test_feats(cache_dir, sid):
    fn = os.path.join(cache_dir, f'test_{sid:05d}.npz')
    if not os.path.exists(fn): return None
    z = np.load(fn, allow_pickle=False); return z['X']

# Predict for each booster/cache pair
for bst, cdir, tag in pairs:
    is_v16 = ('cache_v16' in cdir)
    for i, sid in enumerate(test_ids, 1):
        X = load_test_feats(cdir, sid)
        if X is None: continue
        dm = xgb.DMatrix(X)
        try:
            P = bst.predict(dm)
        except Exception as e:
            print(f'[ErrPredict][{tag}] sid={sid}:', e); continue
        if is_v16:
            map16[sid] = P.astype(np.float32)
        else:
            map15[sid] = P.astype(np.float32)
        if i % 20 == 0:
            print(f'[Predict][{tag}] {i}/{len(test_ids)}')

# Use helper fns from cell 7: load_oof_frames, fit_per_class_temperature, to_calibrated_logits, decode_video, CFG
use_v15 = (len(map15) > 0)
P_oof16, y_oof16 = load_oof_frames(['oof_probs_v16'])
Tcls16 = fit_per_class_temperature(P_oof16, y_oof16, CFG['temp_grid'])
if use_v15:
    P_oof15, y_oof15 = load_oof_frames(['oof_probs_v15'])
    Tcls15 = fit_per_class_temperature(P_oof15, y_oof15, CFG['temp_grid'])
else:
    Tcls15 = Tcls16

rows = []
for sid in test_ids:
    if sid not in map16 and sid not in map15:
        continue
    Z = None
    if sid in map16:
        Z16 = to_calibrated_logits(map16[sid], Tcls16, CFG['bg_bias'], CFG['smooth_win'])
        Z = Z16 if Z is None else Z
    if use_v15 and (sid in map15):
        Z15 = to_calibrated_logits(map15[sid], Tcls15, CFG['bg_bias'], CFG['smooth_win'])
        if Z is None:
            Z = Z15
        else:
            Z = (CFG['w16']*Z + CFG['w15']*Z15).astype(np.float32)
    if Z is None: continue
    seq = decode_video(Z, CFG['alpha'])
    rows.append((sid, ' '.join(map(str, seq))))

sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
sub.to_csv('submission.csv', index=False)
print('[CalibratedDecode] Wrote submission.csv', sub.shape, 'use_v15=', use_v15)

[Models] Using pairs: ['A:v15(120)', 'B:v15(120)']


[Predict][A:v15(120)] 20/95


[Predict][A:v15(120)] 40/95


[Predict][A:v15(120)] 60/95


[Predict][A:v15(120)] 80/95


[Predict][B:v15(120)] 20/95


[Predict][B:v15(120)] 40/95


[Predict][B:v15(120)] 60/95


[Predict][B:v15(120)] 80/95


[CalibratedDecode] Wrote submission.csv (95, 2) use_v15= True


In [22]:
# Train XGBoost on v16 features (193 cols) and write per-video test probs to test_probs_v16
import os, time, glob, numpy as np, pandas as pd, xgboost as xgb, random
from sklearn.model_selection import train_test_split

random.seed(42); np.random.seed(42)
train_meta = pd.read_csv('training.csv')
train_ids = train_meta['Id'].astype(int).tolist()
test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()

def load_train_video(vid):
    fn = os.path.join('cache_v16', f'train_{vid:05d}.npz')
    if not os.path.exists(fn):
        return None, None
    z = np.load(fn, allow_pickle=False)
    X = z['X'].astype(np.float32)
    y = z['y'].astype(np.int32) if 'y' in z else (z['labels'].astype(np.int32) if 'labels' in z else None)
    return X, y

X_list_tr, y_list_tr, vids_avail = [], [], []
t0 = time.time()
for i, vid in enumerate(train_ids, 1):
    Xv, yv = load_train_video(vid)
    if Xv is None or yv is None:
        continue
    if Xv.shape[0] != yv.shape[0]:
        continue
    X_list_tr.append(Xv); y_list_tr.append(yv); vids_avail.append(vid)
    if i % 20 == 0:
        print(f'[LoadTrain] {i}/{len(train_ids)} vids processed; kept={len(vids_avail)}', flush=True)
print(f'[LoadTrain] Done. vids_kept={len(vids_avail)} elapsed={time.time()-t0:.1f}s')

assert X_list_tr and y_list_tr, 'No training videos loaded from cache_v16'
X_all = np.concatenate(X_list_tr, axis=0)
y_all = np.concatenate(y_list_tr, axis=0)
n_feat = X_all.shape[1]
print('[TrainData] X_all', X_all.shape, 'y_all', y_all.shape, 'n_feat', n_feat)
assert n_feat == 193 or n_feat > 150, 'Unexpected feature count; expected v16 ~193 features'

# 10% video-level holdout
vids_arr = np.array(vids_avail, dtype=np.int32)
vids_tr, vids_va = train_test_split(vids_arr, test_size=0.10, random_state=42, shuffle=True)
vid_to_split = {int(v): 'train' for v in vids_tr}
for v in vids_va: vid_to_split[int(v)] = 'valid'

# Build frame-level indices for split
idx_tr, idx_va = [], []
offset = 0
for vid, Xv, yv in zip(vids_avail, X_list_tr, y_list_tr):
    n = Xv.shape[0]
    if vid_to_split.get(int(vid), 'train') == 'train':
        idx_tr.extend(range(offset, offset+n))
    else:
        idx_va.extend(range(offset, offset+n))
    offset += n
idx_tr = np.array(idx_tr, dtype=np.int64); idx_va = np.array(idx_va, dtype=np.int64)
print('[Split] frames train:', idx_tr.size, 'valid:', idx_va.size, 'videos valid:', len(vids_va))

dtrain = xgb.DMatrix(X_all[idx_tr], label=y_all[idx_tr])
dvalid = xgb.DMatrix(X_all[idx_va], label=y_all[idx_va])

params = {
    'objective': 'multi:softprob',
    'num_class': 21,
    'tree_method': 'gpu_hist',
    'predictor': 'gpu_predictor',
    'max_bin': 256,
    'max_depth': 8,
    'eta': 0.05,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'min_child_weight': 3,
    'gamma': 0.0,
    'reg_lambda': 1.0,
    'reg_alpha': 0.0,
    'eval_metric': 'mlogloss'
}
print('[XGB] Params:', params)
evals = [(dtrain, 'train'), (dvalid, 'valid')]
t1 = time.time()
bst = xgb.train(
    params,
    dtrain,
    num_boost_round=1500,
    evals=evals,
    early_stopping_rounds=80,
    verbose_eval=50
)
print('[XGB] Training done in {:.1f}s; best_iter={} valid_mlogloss={:.5f}'.format(time.time()-t1, bst.best_iteration, bst.best_score if hasattr(bst, 'best_score') else float('nan')))
bst.save_model('xgb_train_v16.model')
print('[XGB] Saved model -> xgb_train_v16.model')

# Predict test videos and save as test_probs_v16/test_{Id:05d}.npz with key probs
os.makedirs('test_probs_v16', exist_ok=True)
def load_test_feats(sid):
    fn = os.path.join('cache_v16', f'test_{sid:05d}.npz')
    if not os.path.exists(fn):
        return None
    z = np.load(fn, allow_pickle=False);
    return z['X'].astype(np.float32)

t2 = time.time()
cnt = 0
for i, sid in enumerate(test_ids, 1):
    X = load_test_feats(sid)
    if X is None:
        continue
    dm = xgb.DMatrix(X)
    P = bst.predict(dm, iteration_range=(0, bst.best_iteration+1)) if hasattr(bst, 'best_iteration') else bst.predict(dm)
    P = P.astype(np.float32)
    # Sanity
    if not np.all(np.isfinite(P)):
        P = np.nan_to_num(P, nan=1.0/21, posinf=1.0/21, neginf=1.0/21).astype(np.float32)
    # Normalize rows to sum 1
    P /= np.clip(P.sum(1, keepdims=True), 1e-8, None)
    out_fn = os.path.join('test_probs_v16', f'test_{sid:05d}.npz')
    np.savez_compressed(out_fn, probs=P)
    cnt += 1
    if i % 10 == 0:
        print(f'[Predict][v16] {i}/{len(test_ids)} wrote {cnt} files', flush=True)
print(f'[Predict][v16] Done. wrote={cnt} elapsed={time.time()-t2:.1f}s')

print('[Next] Run decoder sweep with v16+v15 blend, pointing CFG.test_dirs to ["test_probs_v16", "<v15_dir_if_ready>"] and try S1/S4/S5 settings.')

[LoadTrain] 20/297 vids processed; kept=20


[LoadTrain] 40/297 vids processed; kept=40


[LoadTrain] 60/297 vids processed; kept=60


[LoadTrain] 80/297 vids processed; kept=80


[LoadTrain] 100/297 vids processed; kept=100


[LoadTrain] 120/297 vids processed; kept=120


[LoadTrain] 140/297 vids processed; kept=140


[LoadTrain] 160/297 vids processed; kept=160


[LoadTrain] 180/297 vids processed; kept=180


[LoadTrain] 200/297 vids processed; kept=200


[LoadTrain] 220/297 vids processed; kept=220


[LoadTrain] 240/297 vids processed; kept=240


[LoadTrain] 260/297 vids processed; kept=260


[LoadTrain] 280/297 vids processed; kept=280


[LoadTrain] Done. vids_kept=297 elapsed=0.8s


[TrainData] X_all (187296, 193) y_all (187296,) n_feat 193
[Split] frames train: 167968 valid: 19328 videos valid: 30


[XGB] Params: {'objective': 'multi:softprob', 'num_class': 21, 'tree_method': 'gpu_hist', 'predictor': 'gpu_predictor', 'max_bin': 256, 'max_depth': 8, 'eta': 0.05, 'subsample': 0.8, 'colsample_bytree': 0.8, 'min_child_weight': 3, 'gamma': 0.0, 'reg_lambda': 1.0, 'reg_alpha': 0.0, 'eval_metric': 'mlogloss'}



    E.g. tree_method = "hist", device = "cuda"

Parameters: { "predictor" } are not used.



[0]	train-mlogloss:2.85951	valid-mlogloss:2.87358


[50]	train-mlogloss:1.25022	valid-mlogloss:1.57548


[100]	train-mlogloss:0.86216	valid-mlogloss:1.39493


[150]	train-mlogloss:0.64347	valid-mlogloss:1.32724


[200]	train-mlogloss:0.48896	valid-mlogloss:1.29030


[250]	train-mlogloss:0.38071	valid-mlogloss:1.26785


[300]	train-mlogloss:0.30071	valid-mlogloss:1.25274


[350]	train-mlogloss:0.24270	valid-mlogloss:1.24435


[400]	train-mlogloss:0.20023	valid-mlogloss:1.23738


[450]	train-mlogloss:0.16897	valid-mlogloss:1.23703


[500]	train-mlogloss:0.14541	valid-mlogloss:1.23578


[550]	train-mlogloss:0.12746	valid-mlogloss:1.23745


[578]	train-mlogloss:0.11962	valid-mlogloss:1.23840


[XGB] Training done in 136.5s; best_iter=499 valid_mlogloss=1.23563



    E.g. tree_method = "hist", device = "cuda"



[XGB] Saved model -> xgb_train_v16.model


[Predict][v16] 10/95 wrote 10 files


[Predict][v16] 20/95 wrote 20 files


[Predict][v16] 30/95 wrote 30 files


[Predict][v16] 40/95 wrote 40 files


[Predict][v16] 50/95 wrote 50 files


[Predict][v16] 60/95 wrote 60 files


[Predict][v16] 70/95 wrote 70 files


[Predict][v16] 80/95 wrote 80 files


[Predict][v16] 90/95 wrote 90 files


[Predict][v16] Done. wrote=95 elapsed=14.7s
[Next] Run decoder sweep with v16+v15 blend, pointing CFG.test_dirs to ["test_probs_v16", "<v15_dir_if_ready>"] and try S1/S4/S5 settings.


In [23]:
# Decode sweep S1 (v16-only): alpha=0.85, bg_bias=0.20, smooth_win=1, cost=mean
import os, glob, numpy as np, pandas as pd

# Reuse helpers from cell 7: to_calibrated_logits, decode_video, fit_per_class_temperature, load_oof_frames

def _find_npz_simple(d, vid):
    cands = [
        os.path.join(d, f'{vid}.npz'),
        os.path.join(d, f'{vid:05d}.npz'),
        os.path.join(d, f'test_{vid:05d}.npz'),
    ]
    for fn in cands:
        if os.path.exists(fn):
            return fn
    hits = glob.glob(os.path.join(d, f'*{vid:05d}*.npz'))
    return hits[0] if hits else None

def load_probs_dir_simple(d, ids):
    out = {}
    for vid in ids:
        fn = _find_npz_simple(d, vid)
        if not fn:
            continue
        z = np.load(fn, allow_pickle=True)
        if 'probs' in z:
            P = z['probs'].astype(np.float32)
        elif 'P' in z:
            P = z['P'].astype(np.float32)
        else:
            continue
        out[vid] = P
    return out

# S1 config (v16-only) per expert plan
S1 = dict(
    test_dir='test_probs_v16',
    oof_dirs=['oof_probs_v16'],
    alpha=0.85,
    bg_bias=0.20,
    smooth_win=1,
)

test_ids = pd.read_csv('test.csv')['Id'].tolist()
P_oof, y_oof = load_oof_frames(S1['oof_dirs'])
Tcls = fit_per_class_temperature(P_oof, y_oof, np.linspace(0.7, 1.6, 12).astype(np.float32))
m = load_probs_dir_simple(S1['test_dir'], test_ids)
rows = []
for vid in sorted(m.keys()):
    P = m[vid]
    Z = to_calibrated_logits(P, Tcls, S1['bg_bias'], S1['smooth_win'])
    seq = decode_video(Z, S1['alpha'])
    rows.append((vid, ' '.join(map(str, seq))))
sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
sub.to_csv('submission_v16_S1.csv', index=False)
sub.to_csv('submission.csv', index=False)
print('[S1] Wrote submission_v16_S1.csv and submission.csv', sub.shape)

[S1] Wrote submission_v16_S1.csv and submission.csv (95, 2)


In [24]:
# Decoder sweep S4 and S5 (v16-only): median and trimmed mean costs
import os, glob, numpy as np, pandas as pd

# Assumes helper funcs from cell 7 exist: load_oof_frames, fit_per_class_temperature, to_calibrated_logits
from scipy.optimize import linear_sum_assignment

def load_probs_dir_simple(d, ids):
    out = {}
    for vid in ids:
        for pat in (f'{vid}.npz', f'{vid:05d}.npz', f'test_{vid:05d}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: out[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: out[vid] = z['P'].astype(np.float32)
                break
    return out

def segment_exact20_simple(margin, K, min_len):
    T = int(len(margin))
    min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1:
        min_len -= 1
    if K*min_len > T:
        K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32)
    bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv:
                    bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []
    k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse()
    return bounds

def decode_with_cost(Z, alpha, cost_mode='median'):
    T = Z.shape[0]
    min_len = max(2, int(alpha * T / 20.0))
    margin = (Z[:, 1:21].max(1) - Z[:, 0]).astype(np.float32)
    segs = segment_exact20_simple(margin, 20, min_len)
    C = np.zeros((len(segs), 20), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = Z[s:e, 1:21]
        if cost_mode == 'median':
            agg = np.median(seg, axis=0)
        elif cost_mode == 'trimmed_mean_20':
            L = seg.shape[0]; lo = int(0.1*L); hi = max(lo+1, int(0.9*L))
            agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        else:  # mean
            agg = seg.mean(0)
        C[j, :] = -agg
    r, c = linear_sum_assignment(C)
    order = [int(c[j])+1 for j in np.argsort(r)]
    return order

def run_decode_variant(name, alpha, bg_bias, smooth_win, cost_mode):
    test_ids = pd.read_csv('test.csv')['Id'].tolist()
    P_oof, y_oof = load_oof_frames(['oof_probs_v16'])
    Tcls = fit_per_class_temperature(P_oof, y_oof, np.linspace(0.7, 1.6, 12).astype(np.float32))
    mp = load_probs_dir_simple('test_probs_v16', test_ids)
    rows = []
    for vid in sorted(mp.keys()):
        P = mp[vid]
        Z = to_calibrated_logits(P, Tcls, bg_bias, smooth_win)
        seq = decode_with_cost(Z, alpha, cost_mode=cost_mode)
        rows.append((vid, ' '.join(map(str, seq))))
    sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
    out_fn = f'submission_v16_{name}.csv'
    sub.to_csv(out_fn, index=False)
    sub.to_csv('submission.csv', index=False)
    print(f'[{name}] Wrote {out_fn} and submission.csv', sub.shape)

# S4: alpha=0.85, bg_bias=0.25, smooth=3, cost=median
run_decode_variant('S4', alpha=0.85, bg_bias=0.25, smooth_win=3, cost_mode='median')
# S5: alpha=0.85, bg_bias=0.20, smooth=3, cost=trimmed_mean_20
run_decode_variant('S5', alpha=0.85, bg_bias=0.20, smooth_win=3, cost_mode='trimmed_mean_20')

[S4] Wrote submission_v16_S4.csv and submission.csv (95, 2)


[S5] Wrote submission_v16_S5.csv and submission.csv (95, 2)


In [25]:
# v16+v15 blended calibrated decode: generate S1/S4/S5 with w16 in {0.65,0.70,0.75}
import os, glob, time, numpy as np, pandas as pd, xgboost as xgb
from scipy.optimize import linear_sum_assignment

test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()

# Load v16 per-frame probs from disk
def load_probs_dir_simple(d, ids):
    out = {}
    for vid in ids:
        for pat in (f'{vid}.npz', f'{vid:05d}.npz', f'test_{vid:05d}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: out[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: out[vid] = z['P'].astype(np.float32)
                break
    return out

map16 = load_probs_dir_simple('test_probs_v16', test_ids)
print('[Blend] Loaded v16 test probs:', len(map16))

# Predict v15 per-frame probs on-the-fly using available v15 boosters
bstA = bstB = None
try:
    if os.path.exists('xgb_train.model'):
        tmp = xgb.Booster(); tmp.load_model('xgb_train.model')
        if int(tmp.num_features()) == 120: bstA = tmp
except Exception as e:
    print('[v15] skip A:', e)
try:
    if os.path.exists('xgb_train_v15.model'):
        tmp = xgb.Booster(); tmp.load_model('xgb_train_v15.model')
        if int(tmp.num_features()) == 120: bstB = tmp
except Exception as e:
    print('[v15] skip B:', e)
assert (bstA is not None) or (bstB is not None), 'No v15 booster available'

def load_test_v15_X(sid):
    fn = os.path.join('cache_v15', f'test_{sid:05d}.npz')
    if not os.path.exists(fn): return None
    z = np.load(fn, allow_pickle=False);
    X = z['X'].astype(np.float32)
    return X

map15 = {}
t0 = time.time()
for i, sid in enumerate(test_ids, 1):
    X = load_test_v15_X(sid)
    if X is None: continue
    dm = xgb.DMatrix(X)
    Ps = []
    if bstA is not None:
        Ps.append(bstA.predict(dm))
    if bstB is not None:
        Ps.append(bstB.predict(dm))
    if Ps:
        P = np.mean(Ps, axis=0).astype(np.float32)
        P /= np.clip(P.sum(1, keepdims=True), 1e-8, None)
        map15[sid] = P
    if i % 20 == 0:
        print(f'[v15 Predict] {i}/{len(test_ids)}')
print('[Blend] Built v15 test probs:', len(map15), 'elapsed', f'{time.time()-t0:.1f}s')

# Helpers from earlier cells (fallbacks if not in scope)
def load_oof_frames(oof_dirs):
    X, Y = [], []
    for d in oof_dirs:
        for fn in glob.glob(os.path.join(d, '*.npz')):
            z = np.load(fn, allow_pickle=True)
            if 'probs' in z:
                P = z['probs'].astype(np.float32)
            elif 'P' in z:
                P = z['P'].astype(np.float32)
            else:
                continue
            if 'y' in z: y = z['y'].astype(np.int32)
            elif 'labels' in z: y = z['labels'].astype(np.int32)
            else: continue
            if P.ndim == 2 and len(y) == P.shape[0]:
                X.append(P); Y.append(y)
    if not X: return None, None
    return np.concatenate(X, 0), np.concatenate(Y, 0)

from scipy.special import logit
from scipy.signal import convolve
def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    if P_oof is None or y_oof is None:
        return np.ones(21, dtype=np.float32)
    C = P_oof.shape[1]; Tcls = np.ones(C, dtype=np.float32)
    y = y_oof
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc)
        yc = (y == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logits(P, Tcls, bg_bias, smooth_win):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32)
    Z = Z / Tcls.reshape(1, -1)
    Z[:, 0] += float(bg_bias)
    if smooth_win and smooth_win > 1:
        k = np.ones(int(smooth_win), np.float32) / float(smooth_win)
        Z = np.stack([convolve(Z[:, i], k, mode='same') for i in range(Z.shape[1])], 1).astype(np.float32)
    return Z

def segment_exact20_simple(margin, K, min_len):
    T = int(len(margin)); min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1: min_len -= 1
    if K*min_len > T: K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32); bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv: bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []; k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse(); return bounds

def decode_with_cost(Z, alpha, cost_mode='mean'):
    T = Z.shape[0]
    min_len = max(2, int(alpha * T / 20.0))
    margin = (Z[:, 1:21].max(1) - Z[:, 0]).astype(np.float32)
    segs = segment_exact20_simple(margin, 20, min_len)
    C = np.zeros((len(segs), 20), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = Z[s:e, 1:21]
        if cost_mode == 'median':
            agg = np.median(seg, axis=0)
        elif cost_mode == 'trimmed_mean_20':
            L = seg.shape[0]; lo = int(0.1*L); hi = max(lo+1, int(0.9*L))
            agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        else:
            agg = seg.mean(0)
        C[j, :] = -agg
    r, c = linear_sum_assignment(C)
    order = [int(c[j])+1 for j in np.argsort(r)]
    return order

# Calibrate per-class temperatures on OOF (separately for v16 and v15)
temp_grid = np.linspace(0.7, 1.6, 12).astype(np.float32)
P_oof16, y_oof16 = load_oof_frames(['oof_probs_v16'])
P_oof15, y_oof15 = load_oof_frames(['oof_probs_v15'])
Tcls16 = fit_per_class_temperature(P_oof16, y_oof16, temp_grid)
Tcls15 = fit_per_class_temperature(P_oof15, y_oof15, temp_grid)

def run_blend_variant(name, alpha, bg_bias, smooth_win, cost_mode, w16):
    rows = []
    for vid in sorted(test_ids):
        P16 = map16.get(vid)
        P15 = map15.get(vid)
        if P16 is None and P15 is None: continue
        Z_mix = None
        if P16 is not None:
            Z16 = to_calibrated_logits(P16, Tcls16, bg_bias, smooth_win)
            Z_mix = Z16 if Z_mix is None else Z_mix
        if P15 is not None:
            Z15 = to_calibrated_logits(P15, Tcls15, bg_bias, smooth_win)
            if Z_mix is None:
                Z_mix = Z15
            else:
                w15 = 1.0 - w16
                Z_mix = (w16*Z_mix + w15*Z15).astype(np.float32)
        seq = decode_with_cost(Z_mix, alpha, cost_mode=cost_mode)
        rows.append((vid, ' '.join(map(str, seq))))
    sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
    out_fn = f'submission_v16v15_{name}_w{int(round(w16*100)):02d}.csv'
    sub.to_csv(out_fn, index=False)
    print(f'[{name}] w16={w16:.2f} -> {out_fn}', sub.shape)
    return out_fn

# Run S1, S4, S5 for w16 in {0.65, 0.70, 0.75}; set submission.csv to S4 w16=0.70 as primary
out_files = []
for w in (0.65, 0.70, 0.75):
    out_files.append(run_blend_variant('S1_mean', alpha=0.85, bg_bias=0.20, smooth_win=1, cost_mode='mean', w16=w))
for w in (0.65, 0.70, 0.75):
    out_files.append(run_blend_variant('S4_median', alpha=0.85, bg_bias=0.25, smooth_win=3, cost_mode='median', w16=w))
for w in (0.65, 0.70, 0.75):
    out_files.append(run_blend_variant('S5_trimmed', alpha=0.85, bg_bias=0.20, smooth_win=3, cost_mode='trimmed_mean_20', w16=w))

# Set primary submission.csv to S4_median w16=0.70
primary = 'submission_v16v15_S4_median_w70.csv'
if os.path.exists(primary):
    pd.read_csv(primary).to_csv('submission.csv', index=False)
    print('[Primary] Wrote submission.csv from', primary)
else:
    print('[Primary] Missing preferred file; not overwriting submission.csv')

[Blend] Loaded v16 test probs: 95


[v15 Predict] 20/95


[v15 Predict] 40/95


[v15 Predict] 60/95


[v15 Predict] 80/95


[Blend] Built v15 test probs: 95 elapsed 8.5s


[S1_mean] w16=0.65 -> submission_v16v15_S1_mean_w65.csv (95, 2)


[S1_mean] w16=0.70 -> submission_v16v15_S1_mean_w70.csv (95, 2)


[S1_mean] w16=0.75 -> submission_v16v15_S1_mean_w75.csv (95, 2)


[S4_median] w16=0.65 -> submission_v16v15_S4_median_w65.csv (95, 2)


[S4_median] w16=0.70 -> submission_v16v15_S4_median_w70.csv (95, 2)


[S4_median] w16=0.75 -> submission_v16v15_S4_median_w75.csv (95, 2)


[S5_trimmed] w16=0.65 -> submission_v16v15_S5_trimmed_w65.csv (95, 2)


[S5_trimmed] w16=0.70 -> submission_v16v15_S5_trimmed_w70.csv (95, 2)


[S5_trimmed] w16=0.75 -> submission_v16v15_S5_trimmed_w75.csv (95, 2)
[Primary] Wrote submission.csv from submission_v16v15_S4_median_w70.csv


In [26]:
# Swap to S5_trimmed w16=0.70 blended decode and write to submission.csv
import os, pandas as pd, shutil
src = 'submission_v16v15_S5_trimmed_w70.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[Swap] submission.csv <- submission_v16v15_S5_trimmed_w70.csv (95, 2)


In [27]:
# Decoder hyperparam sweep (S1..S10) with global temp g and w16 variants
import os, time, glob, numpy as np, pandas as pd, xgboost as xgb
from scipy.optimize import linear_sum_assignment
from scipy.special import logit
from scipy.signal import convolve

test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()

def load_probs_dir_simple(d, ids):
    out = {}
    for vid in ids:
        for pat in (f'{vid}.npz', f'{vid:05d}.npz', f'test_{vid:05d}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: out[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: out[vid] = z['P'].astype(np.float32)
                break
    return out

def load_oof_frames(oof_dirs):
    X, Y = [], []
    for d in oof_dirs:
        for fn in glob.glob(os.path.join(d, '*.npz')):
            z = np.load(fn, allow_pickle=True)
            if 'probs' in z: P = z['probs'].astype(np.float32)
            elif 'P' in z: P = z['P'].astype(np.float32)
            else: continue
            if 'y' in z: y = z['y'].astype(np.int32)
            elif 'labels' in z: y = z['labels'].astype(np.int32)
            else: continue
            if P.ndim == 2 and len(y) == P.shape[0]:
                X.append(P); Y.append(y)
    if not X: return None, None
    return np.concatenate(X, 0), np.concatenate(Y, 0)

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    if P_oof is None or y_oof is None:
        return np.ones(21, dtype=np.float32)
    C = P_oof.shape[1]
    Tcls = np.ones(C, dtype=np.float32)
    y = y_oof
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc)
        yc = (y == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logits(P, Tcls, bg_bias, smooth_win, g=1.0):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32)
    Z = Z / (Tcls.reshape(1, -1) * float(g))
    Z[:, 0] += float(bg_bias)
    if smooth_win and smooth_win > 1:
        k = np.ones(int(smooth_win), np.float32) / float(smooth_win)
        Z = np.stack([convolve(Z[:, i], k, mode='same') for i in range(Z.shape[1])], 1).astype(np.float32)
    return Z

def segment_exact20_simple(margin, K, min_len):
    T = int(len(margin)); min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1: min_len -= 1
    if K*min_len > T: K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32); bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv: bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []; k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse(); return bounds

def decode_with_cost(Z, alpha, cost_mode='mean'):
    T = Z.shape[0]
    min_len = max(2, int(alpha * T / 20.0))
    margin = (Z[:, 1:21].max(1) - Z[:, 0]).astype(np.float32)
    segs = segment_exact20_simple(margin, 20, min_len)
    C = np.zeros((len(segs), 20), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = Z[s:e, 1:21]
        if cost_mode == 'median':
            agg = np.median(seg, axis=0)
        elif cost_mode == 'trimmed_mean_20':
            L = seg.shape[0]; lo = int(0.1*L); hi = max(lo+1, int(0.9*L))
            agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        else:
            agg = seg.mean(0)
        C[j, :] = -agg
    r, c = linear_sum_assignment(C)
    order = [int(c[j])+1 for j in np.argsort(r)]
    return order

# Load probs
map16 = load_probs_dir_simple('test_probs_v16', test_ids)
print('[Sweep] v16 test probs:', len(map16))

# Build v15 probs via boosters (fast) if available
bstA = bstB = None
try:
    if os.path.exists('xgb_train.model'):
        tmp = xgb.Booster(); tmp.load_model('xgb_train.model')
        if int(tmp.num_features()) == 120: bstA = tmp
except Exception as e:
    print('[v15] skip A:', e)
try:
    if os.path.exists('xgb_train_v15.model'):
        tmp = xgb.Booster(); tmp.load_model('xgb_train_v15.model')
        if int(tmp.num_features()) == 120: bstB = tmp
except Exception as e:
    print('[v15] skip B:', e)
map15 = {}
if (bstA is not None) or (bstB is not None):
    t0 = time.time()
    for i, sid in enumerate(test_ids, 1):
        fn = os.path.join('cache_v15', f'test_{sid:05d}.npz')
        if not os.path.exists(fn): continue
        X = np.load(fn)['X'].astype(np.float32)
        dm = xgb.DMatrix(X)
        Ps = []
        if bstA is not None: Ps.append(bstA.predict(dm))
        if bstB is not None: Ps.append(bstB.predict(dm))
        if Ps:
            P = np.mean(Ps, axis=0).astype(np.float32)
            P /= np.clip(P.sum(1, keepdims=True), 1e-8, None)
            map15[sid] = P
        if i % 20 == 0: print(f'[v15] {i}/{len(test_ids)}')
    print('[Sweep] v15 built:', len(map15), 'in', f'{time.time()-t0:.1f}s')
else:
    print('[Sweep] No v15 boosters; proceeding v16-only with w16=1.0')

# Calibrate per-class temperatures separately
temp_grid = np.linspace(0.7, 1.6, 12).astype(np.float32)
P_oof16, y_oof16 = load_oof_frames(['oof_probs_v16'])
Tcls16 = fit_per_class_temperature(P_oof16, y_oof16, temp_grid)
if len(map15) > 0:
    P_oof15, y_oof15 = load_oof_frames(['oof_probs_v15'])
    Tcls15 = fit_per_class_temperature(P_oof15, y_oof15, temp_grid)
else:
    Tcls15 = Tcls16

def run_one(name, alpha, bg_bias, smooth, cost_mode, w16=0.70, g=1.0):
    rows = []
    for vid in sorted(test_ids):
        P16 = map16.get(vid)
        P15 = map15.get(vid) if len(map15)>0 else None
        if P16 is None and P15 is None: continue
        Z_mix = None
        if P16 is not None:
            Z16 = to_calibrated_logits(P16, Tcls16, bg_bias, smooth, g=g)
            Z_mix = Z16
        if P15 is not None:
            Z15 = to_calibrated_logits(P15, Tcls15, bg_bias, smooth, g=g)
            if Z_mix is None:
                Z_mix = Z15
            else:
                Z_mix = (w16*Z_mix + (1.0-w16)*Z15).astype(np.float32)
        seq = decode_with_cost(Z_mix, alpha, cost_mode=cost_mode)
        rows.append((vid, ' '.join(map(str, seq))))
    sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
    out = f'submission_sweep_{name}.csv'
    sub.to_csv(out, index=False)
    print('[Sweep] wrote', out, sub.shape)
    return out

# Runs S1..S10 per expert plan
outs = []
outs.append(run_one('S1_w70_mean_a0.85_bb0.20_s1_g1.0', alpha=0.85, bg_bias=0.20, smooth=1, cost_mode='mean', w16=0.70, g=1.0))
outs.append(run_one('S2_w70_mean_a0.80_bb0.25_s1_g1.0', alpha=0.80, bg_bias=0.25, smooth=1, cost_mode='mean', w16=0.70, g=1.0))
outs.append(run_one('S3_w70_mean_a0.90_bb0.15_s1_g1.0', alpha=0.90, bg_bias=0.15, smooth=1, cost_mode='mean', w16=0.70, g=1.0))
outs.append(run_one('S4_w70_median_a0.85_bb0.25_s3_g1.0', alpha=0.85, bg_bias=0.25, smooth=3, cost_mode='median', w16=0.70, g=1.0))
outs.append(run_one('S5_w70_trim_a0.85_bb0.20_s3_g1.0', alpha=0.85, bg_bias=0.20, smooth=3, cost_mode='trimmed_mean_20', w16=0.70, g=1.0))
outs.append(run_one('S6_w70_mean_a0.80_bb0.30_s3_g1.0', alpha=0.80, bg_bias=0.30, smooth=3, cost_mode='mean', w16=0.70, g=1.0))
outs.append(run_one('S7_w70_mean_a0.85_bb0.20_s1_g0.9', alpha=0.85, bg_bias=0.20, smooth=1, cost_mode='mean', w16=0.70, g=0.9))
outs.append(run_one('S8_w70_mean_a0.85_bb0.20_s1_g1.1', alpha=0.85, bg_bias=0.20, smooth=1, cost_mode='mean', w16=0.70, g=1.1))
outs.append(run_one('S9_w60_mean_a0.85_bb0.20_s1_g1.0', alpha=0.85, bg_bias=0.20, smooth=1, cost_mode='mean', w16=0.60, g=1.0))
outs.append(run_one('S10_w80_mean_a0.85_bb0.20_s1_g1.0', alpha=0.85, bg_bias=0.20, smooth=1, cost_mode='mean', w16=0.80, g=1.0))

print('[Sweep] completed', len(outs), 'files')
print('\n'.join(outs))

[Sweep] v16 test probs: 95


[v15] 20/95


[v15] 40/95


[v15] 60/95


[v15] 80/95


[Sweep] v15 built: 95 in 8.8s


[Sweep] wrote submission_sweep_S1_w70_mean_a0.85_bb0.20_s1_g1.0.csv (95, 2)


[Sweep] wrote submission_sweep_S2_w70_mean_a0.80_bb0.25_s1_g1.0.csv (95, 2)


[Sweep] wrote submission_sweep_S3_w70_mean_a0.90_bb0.15_s1_g1.0.csv (95, 2)


[Sweep] wrote submission_sweep_S4_w70_median_a0.85_bb0.25_s3_g1.0.csv (95, 2)


[Sweep] wrote submission_sweep_S5_w70_trim_a0.85_bb0.20_s3_g1.0.csv (95, 2)


[Sweep] wrote submission_sweep_S6_w70_mean_a0.80_bb0.30_s3_g1.0.csv (95, 2)


[Sweep] wrote submission_sweep_S7_w70_mean_a0.85_bb0.20_s1_g0.9.csv (95, 2)


[Sweep] wrote submission_sweep_S8_w70_mean_a0.85_bb0.20_s1_g1.1.csv (95, 2)


[Sweep] wrote submission_sweep_S9_w60_mean_a0.85_bb0.20_s1_g1.0.csv (95, 2)


[Sweep] wrote submission_sweep_S10_w80_mean_a0.85_bb0.20_s1_g1.0.csv (95, 2)
[Sweep] completed 10 files
submission_sweep_S1_w70_mean_a0.85_bb0.20_s1_g1.0.csv
submission_sweep_S2_w70_mean_a0.80_bb0.25_s1_g1.0.csv
submission_sweep_S3_w70_mean_a0.90_bb0.15_s1_g1.0.csv
submission_sweep_S4_w70_median_a0.85_bb0.25_s3_g1.0.csv
submission_sweep_S5_w70_trim_a0.85_bb0.20_s3_g1.0.csv
submission_sweep_S6_w70_mean_a0.80_bb0.30_s3_g1.0.csv
submission_sweep_S7_w70_mean_a0.85_bb0.20_s1_g0.9.csv
submission_sweep_S8_w70_mean_a0.85_bb0.20_s1_g1.1.csv
submission_sweep_S9_w60_mean_a0.85_bb0.20_s1_g1.0.csv
submission_sweep_S10_w80_mean_a0.85_bb0.20_s1_g1.0.csv


In [28]:
# Swap to sweep S6 (w16=0.70, mean, alpha=0.80, bg_bias=0.30, smooth=3, g=1.0)
import os, shutil, pandas as pd
src = 'submission_sweep_S6_w70_mean_a0.80_bb0.30_s3_g1.0.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[Swap] submission.csv <- submission_sweep_S6_w70_mean_a0.80_bb0.30_s3_g1.0.csv (95, 2)


In [29]:
# Weighted rank-ensemble hedge: 0.70 calibrated decode + 0.20 w7_A + 0.10 w6_A
import pandas as pd, numpy as np, os

# Choose primary calibrated decode (from our sweep); adjust if needed
primary = 'submission_sweep_S6_w70_mean_a0.80_bb0.30_s3_g1.0.csv'
fallback = 'submission_v16v15_S4_median_w70.csv'
if not os.path.exists(primary):
    assert os.path.exists(fallback), f'Missing both {primary} and {fallback}'
    primary = fallback

aux1 = 'submission_blend_w7_A.csv'
aux2 = 'submission_blend_w6_A.csv'
assert os.path.exists(primary) and os.path.exists(aux1) and os.path.exists(aux2)

w_primary, w_aux1, w_aux2 = 0.70, 0.20, 0.10

dfs = [pd.read_csv(f).sort_values('Id').reset_index(drop=True) for f in (primary, aux1, aux2)]
ids = dfs[0]['Id'].tolist()
for df in dfs: assert df.shape[0] == 95 and set(df.columns)=={'Id','Sequence'} and df['Id'].tolist()==ids

def seq_to_order(seq_str):
    arr = [int(x) for x in str(seq_str).split()]
    # ensure valid classes 1..20; trim/repair if needed
    arr = [x for x in arr if 1 <= x <= 20][:20]
    # rescue to permutation 1..20
    seen, dup_idx = set(), []
    for i, x in enumerate(arr):
        if x in seen: dup_idx.append(i)
        else: seen.add(x)
    missing = [k for k in range(1,21) if k not in seen]
    for j, i in enumerate(dup_idx):
        if j < len(missing): arr[i] = missing[j]
    # If still short, append remaining missing
    if len(arr) < 20: arr += [k for k in range(1,21) if k not in set(arr)]
    return arr[:20]

weights = np.array([w_primary, w_aux1, w_aux2], dtype=np.float32)
seqs = []
for i in range(len(ids)):
    rank_sum = np.zeros(21, dtype=np.float32)  # index 1..20 used
    for j, df in enumerate(dfs):
        order = seq_to_order(df.iloc[i].Sequence)
        for pos, cls in enumerate(order):
            rank_sum[cls] += weights[j] * (pos + 1)
    order = np.argsort(rank_sum[1:]) + 1  # 1..20 by ascending weighted rank
    seqs.append(' '.join(map(str, order)))

out = pd.DataFrame({'Id': ids, 'Sequence': seqs})
out.to_csv('submission.csv', index=False)
print('[HedgeRank] submission.csv from', primary, '+ 0.20 w7_A + 0.10 w6_A', out.shape)

[HedgeRank] submission.csv from submission_sweep_S6_w70_mean_a0.80_bb0.30_s3_g1.0.csv + 0.20 w7_A + 0.10 w6_A (95, 2)


In [30]:
# Create S4 median with g=0.95 (w16=0.70), then set as submission.csv
import os, pandas as pd, shutil

# Assumes run_one and all helpers from cell 16 are in scope
out = run_one('S4_w70_median_a0.85_bb0.25_s3_g0.95', alpha=0.85, bg_bias=0.25, smooth=3, cost_mode='median', w16=0.70, g=0.95)
src = f'submission_sweep_S4_w70_median_a0.85_bb0.25_s3_g0.95.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[Sweep] wrote submission_sweep_S4_w70_median_a0.85_bb0.25_s3_g0.95.csv (95, 2)
[Swap] submission.csv <- submission_sweep_S4_w70_median_a0.85_bb0.25_s3_g0.95.csv (95, 2)


In [31]:
# Swap to expert-picked S7 (w16=0.70, mean, alpha=0.85, bg_bias=0.20, smooth=1, g=0.9)
import os, shutil, pandas as pd
src = 'submission_sweep_S7_w70_mean_a0.85_bb0.20_s1_g0.9.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[Swap] submission.csv <- submission_sweep_S7_w70_mean_a0.85_bb0.20_s1_g0.9.csv (95, 2)


In [32]:
# Swap to expert fallback S9 (w16=0.60, mean, alpha=0.85, bg_bias=0.20, smooth=1, g=1.0)
import os, shutil, pandas as pd
src = 'submission_sweep_S9_w60_mean_a0.85_bb0.20_s1_g1.0.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[Swap] submission.csv <- submission_sweep_S9_w60_mean_a0.85_bb0.20_s1_g1.0.csv (95, 2)


In [33]:
# Log-prob decoding variant (per expert coach): S4 median with g=0.95 using log-probabilities
import os, glob, time, numpy as np, pandas as pd, xgboost as xgb
from scipy.special import logit, softmax
from scipy.signal import convolve
from scipy.optimize import linear_sum_assignment

eps = 1e-6
test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()

def load_probs_dir_simple(d, ids):
    out = {}
    for vid in ids:
        for pat in (f'{vid}.npz', f'{vid:05d}.npz', f'test_{vid:05d}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: out[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: out[vid] = z['P'].astype(np.float32)
                break
    return out

def load_oof_frames(oof_dirs):
    X, Y = [], []
    for d in oof_dirs:
        for fn in glob.glob(os.path.join(d, '*.npz')):
            z = np.load(fn, allow_pickle=True)
            if 'probs' in z: P = z['probs'].astype(np.float32)
            elif 'P' in z: P = z['P'].astype(np.float32)
            else: continue
            if 'y' in z: y = z['y'].astype(np.int32)
            elif 'labels' in z: y = z['labels'].astype(np.int32)
            else: continue
            if P.ndim == 2 and len(y) == P.shape[0]:
                X.append(P); Y.append(y)
    if not X: return None, None
    return np.concatenate(X, 0), np.concatenate(Y, 0)

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    if P_oof is None or y_oof is None:
        return np.ones(21, dtype=np.float32)
    C = P_oof.shape[1]
    Tcls = np.ones(C, dtype=np.float32)
    y = y_oof
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc)
        yc = (y == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logprobs(P, Tcls, bg_bias, smooth_win, g=1.0):
    # Clip, convert to per-class scaled logits, softmax to calibrated probs, then log-prob; smooth in log space
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32)  # per-class logits (binary logit approximation per class)
    Z = Z / (Tcls.reshape(1, -1) * float(g))
    # Convert to calibrated probs via softmax across 21 classes
    Pcal = softmax(Z, axis=1).astype(np.float32)
    # Ensure normalization and numeric stability
    Pcal = Pcal / np.clip(Pcal.sum(1, keepdims=True), 1e-6, None)
    L = np.log(np.clip(Pcal, 1e-6, 1.0)).astype(np.float32)  # log-probabilities
    # Background bias in log-space: add bias to bg log-prob
    L[:, 0] += float(bg_bias)
    if smooth_win and smooth_win > 1:
        k = np.ones(int(smooth_win), np.float32) / float(smooth_win)
        L = np.stack([convolve(L[:, i], k, mode='same') for i in range(L.shape[1])], 1).astype(np.float32)
    return L

def segment_exact20_simple(margin, K, min_len):
    T = int(len(margin)); min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1: min_len -= 1
    if K*min_len > T: K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32); bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv: bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []; k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse(); return bounds

def decode_with_cost_logprob(L, alpha, cost_mode='median'):
    # L are log-probabilities; use margin on log-probs
    T = L.shape[0]
    min_len = max(2, int(alpha * T / 20.0))
    margin = (L[:, 1:21].max(1) - L[:, 0]).astype(np.float32)
    segs = segment_exact20_simple(margin, 20, min_len)
    C = np.zeros((len(segs), 20), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = L[s:e, 1:21]
        if cost_mode == 'median':
            agg = np.median(seg, axis=0)
        elif cost_mode == 'trimmed_mean_20':
            Ln = seg.shape[0]; lo = int(0.1*Ln); hi = max(lo+1, int(0.9*Ln))
            agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        else:
            agg = seg.mean(0)
        C[j, :] = -agg  # maximize log-prob -> minimize negative
    r, c = linear_sum_assignment(C)
    order = [int(c[j])+1 for j in np.argsort(r)]
    return order

# Load per-frame probs for v16 and v15 and per-class temps
map16 = load_probs_dir_simple('test_probs_v16', test_ids)
bstA = bstB = None
try:
    if os.path.exists('xgb_train.model'):
        tmp = xgb.Booster(); tmp.load_model('xgb_train.model')
        if int(tmp.num_features()) == 120: bstA = tmp
except Exception as e:
    print('[v15] skip A:', e)
try:
    if os.path.exists('xgb_train_v15.model'):
        tmp = xgb.Booster(); tmp.load_model('xgb_train_v15.model')
        if int(tmp.num_features()) == 120: bstB = tmp
except Exception as e:
    print('[v15] skip B:', e)
map15 = {}
if (bstA is not None) or (bstB is not None):
    for sid in test_ids:
        fn = os.path.join('cache_v15', f'test_{sid:05d}.npz')
        if not os.path.exists(fn): continue
        X = np.load(fn)['X'].astype(np.float32)
        dm = xgb.DMatrix(X)
        Ps = []
        if bstA is not None: Ps.append(bstA.predict(dm))
        if bstB is not None: Ps.append(bstB.predict(dm))
        if Ps:
            P = np.mean(Ps, axis=0).astype(np.float32)
            P /= np.clip(P.sum(1, keepdims=True), 1e-8, None)
            map15[sid] = P

temp_grid = np.linspace(0.7, 1.6, 12).astype(np.float32)
P_oof16, y_oof16 = load_oof_frames(['oof_probs_v16'])
Tcls16 = fit_per_class_temperature(P_oof16, y_oof16, temp_grid)
P_oof15, y_oof15 = load_oof_frames(['oof_probs_v15'])
Tcls15 = fit_per_class_temperature(P_oof15, y_oof15, temp_grid) if P_oof15 is not None else Tcls16

def run_one_logprob(name, alpha, bg_bias, smooth, cost_mode, w16=0.70, g=0.95):
    rows = []
    for vid in sorted(test_ids):
        P16 = map16.get(vid); P15 = map15.get(vid)
        if P16 is None and P15 is None: continue
        Lmix = None
        if P16 is not None:
            L16 = to_calibrated_logprobs(P16, Tcls16, bg_bias, smooth, g=g)
            Lmix = L16
        if P15 is not None:
            L15 = to_calibrated_logprobs(P15, Tcls15, bg_bias, smooth, g=g)
            if Lmix is None:
                Lmix = L15
            else:
                Lmix = (w16*Lmix + (1.0-w16)*L15).astype(np.float32)
        seq = decode_with_cost_logprob(Lmix, alpha, cost_mode=cost_mode)
        rows.append((vid, ' '.join(map(str, seq))))
    sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
    out = f'submission_logprob_{name}.csv'
    sub.to_csv(out, index=False)
    print('[LogProb] wrote', out, sub.shape)
    return out

# Expert-pick: S4 median with g=0.95, w16=0.70
out = run_one_logprob('S4_w70_median_a0.85_bb0.25_s3_g0.95', alpha=0.85, bg_bias=0.25, smooth=3, cost_mode='median', w16=0.70, g=0.95)
pd.read_csv(out).to_csv('submission.csv', index=False)
print('[Swap] submission.csv <-', out)

[LogProb] wrote submission_logprob_S4_w70_median_a0.85_bb0.25_s3_g0.95.csv (95, 2)
[Swap] submission.csv <- submission_logprob_S4_w70_median_a0.85_bb0.25_s3_g0.95.csv


In [34]:
# Log-prob decoding: S5 trimmed mean with g=0.95 (w16=0.70), then set as submission.csv
import pandas as pd, shutil, os
out = run_one_logprob('S5_w70_trim_a0.85_bb0.20_s3_g0.95', alpha=0.85, bg_bias=0.20, smooth=3, cost_mode='trimmed_mean_20', w16=0.70, g=0.95)
src = f'submission_logprob_S5_w70_trim_a0.85_bb0.20_s3_g0.95.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[LogProb] wrote submission_logprob_S5_w70_trim_a0.85_bb0.20_s3_g0.95.csv (95, 2)
[Swap] submission.csv <- submission_logprob_S5_w70_trim_a0.85_bb0.20_s3_g0.95.csv (95, 2)


In [35]:
# Log-prob S4 median g=0.95 with w16 in {0.65, 0.75}; set 0.65 as submission.csv
import os, pandas as pd, shutil

# Generate both variants
out65 = run_one_logprob('S4_w65_median_a0.85_bb0.25_s3_g0.95', alpha=0.85, bg_bias=0.25, smooth=3, cost_mode='median', w16=0.65, g=0.95)
out75 = run_one_logprob('S4_w75_median_a0.85_bb0.25_s3_g0.95', alpha=0.85, bg_bias=0.25, smooth=3, cost_mode='median', w16=0.75, g=0.95)

# Copy w16=0.65 to submission.csv for next submission; we'll try 0.75 next if needed
src = 'submission_logprob_S4_w65_median_a0.85_bb0.25_s3_g0.95.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[LogProb] wrote submission_logprob_S4_w65_median_a0.85_bb0.25_s3_g0.95.csv (95, 2)


[LogProb] wrote submission_logprob_S4_w75_median_a0.85_bb0.25_s3_g0.95.csv (95, 2)
[Swap] submission.csv <- submission_logprob_S4_w65_median_a0.85_bb0.25_s3_g0.95.csv (95, 2)


In [36]:
# Swap to log-prob S4 median g=0.95 w16=0.75 and write to submission.csv
import os, shutil, pandas as pd
src = 'submission_logprob_S4_w75_median_a0.85_bb0.25_s3_g0.95.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[Swap] submission.csv <- submission_logprob_S4_w75_median_a0.85_bb0.25_s3_g0.95.csv (95, 2)


In [37]:
# Log-prob S4 median g=0.95 with smooth=1 (w16=0.70), then set as submission.csv
import os, pandas as pd, shutil
out = run_one_logprob('S4_w70_median_a0.85_bb0.25_s1_g0.95', alpha=0.85, bg_bias=0.25, smooth=1, cost_mode='median', w16=0.70, g=0.95)
src = 'submission_logprob_S4_w70_median_a0.85_bb0.25_s1_g0.95.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[LogProb] wrote submission_logprob_S4_w70_median_a0.85_bb0.25_s1_g0.95.csv (95, 2)
[Swap] submission.csv <- submission_logprob_S4_w70_median_a0.85_bb0.25_s1_g0.95.csv (95, 2)


In [38]:
# Log-prob v16-only (ignore v15) S4 median g=0.95; set as submission.csv
import os, pandas as pd, shutil

# Ensure v15 stream is ignored
try:
    map15.clear()
except Exception:
    map15 = {}

# v16-only: w16=1.0, smooth=3
out_v16_s3 = run_one_logprob('S4_w100_v16only_median_a0.85_bb0.25_s3_g0.95', alpha=0.85, bg_bias=0.25, smooth=3, cost_mode='median', w16=1.0, g=0.95)

# Optionally also try smooth=1 quickly and choose one; default to s3 for submission.csv
out_v16_s1 = run_one_logprob('S4_w100_v16only_median_a0.85_bb0.25_s1_g0.95', alpha=0.85, bg_bias=0.25, smooth=1, cost_mode='median', w16=1.0, g=0.95)

src = 'submission_logprob_S4_w100_v16only_median_a0.85_bb0.25_s3_g0.95.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[LogProb] wrote submission_logprob_S4_w100_v16only_median_a0.85_bb0.25_s3_g0.95.csv (95, 2)


[LogProb] wrote submission_logprob_S4_w100_v16only_median_a0.85_bb0.25_s1_g0.95.csv (95, 2)
[Swap] submission.csv <- submission_logprob_S4_w100_v16only_median_a0.85_bb0.25_s3_g0.95.csv (95, 2)


In [39]:
# Log-prob S4 median with stronger bias/shorter segments: a=0.80, bb=0.30, s3, g=0.95, w16=0.70
import os, pandas as pd, shutil
out = run_one_logprob('S4_w70_median_a0.80_bb0.30_s3_g0.95', alpha=0.80, bg_bias=0.30, smooth=3, cost_mode='median', w16=0.70, g=0.95)
src = 'submission_logprob_S4_w70_median_a0.80_bb0.30_s3_g0.95.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[Swap] submission.csv <-', src, df.shape)

[LogProb] wrote submission_logprob_S4_w70_median_a0.80_bb0.30_s3_g0.95.csv (95, 2)
[Swap] submission.csv <- submission_logprob_S4_w70_median_a0.80_bb0.30_s3_g0.95.csv (95, 2)


In [40]:
# Final picks per expert: New log-prob S4 median g=0.90, then prepare S9 as next
import os, shutil, pandas as pd

# 1) New: log-prob S4 median with g=0.90 (w16=0.70, a=0.85, bb=0.25, s3)
out1 = run_one_logprob('S4_w70_median_a0.85_bb0.25_s3_g0.90',
                       alpha=0.85, bg_bias=0.25, smooth=3,
                       cost_mode='median', w16=0.70, g=0.90)
src1 = 'submission_logprob_S4_w70_median_a0.85_bb0.25_s3_g0.90.csv'
assert os.path.exists(src1), f'Missing {src1}'
shutil.copyfile(src1, 'submission.csv')
print('[Submit #1] submission.csv <-', src1)

# 2) Diverse hedge (existing): S9 mean
src2 = 'submission_sweep_S9_w60_mean_a0.85_bb0.20_s1_g1.0.csv'
assert os.path.exists(src2), f'Missing {src2}'
shutil.copyfile(src2, 'submission_next.csv')
print('[Submit #2 ready] submission_next.csv <-', src2)

[LogProb] wrote submission_logprob_S4_w70_median_a0.85_bb0.25_s3_g0.90.csv (95, 2)
[Submit #1] submission.csv <- submission_logprob_S4_w70_median_a0.85_bb0.25_s3_g0.90.csv
[Submit #2 ready] submission_next.csv <- submission_sweep_S9_w60_mean_a0.85_bb0.20_s1_g1.0.csv


In [41]:
# Swap staged Pick #2 (S9) into submission.csv
import os, shutil, pandas as pd
src = 'submission_next.csv'
assert os.path.exists(src), f'Missing {src}'
shutil.copyfile(src, 'submission.csv')
df = pd.read_csv('submission.csv')
print('[SwapNext] submission.csv <-', src, df.shape)

[SwapNext] submission.csv <- submission_next.csv (95, 2)


In [45]:
# Sanity checks: class mapping and submission format (robust + infer bg column across all OOF files)
import glob, numpy as np, pandas as pd, os, json, random

print('=== OOF/class mapping sanity ===')
oof_files = sorted(glob.glob('oof_probs_v16/oof_*.npz'))
assert oof_files, 'No OOF files found in oof_probs_v16'
n_show = min(3, len(oof_files))
print('oof files count=', len(oof_files), 'showing first', n_show)
means = []
for i, fn in enumerate(oof_files):
    z = np.load(fn)
    keys = set(getattr(z, 'files', []))
    P = z['probs'] if 'probs' in keys else (z['P'] if 'P' in keys else None)
    if P is None: continue
    if P.ndim != 2: continue
    if i < n_show:
        print(' sample', i, 'keys=', sorted(list(keys)), 'shape=', P.shape)
    means.append(P.mean(0))
assert means, 'No usable probs in OOF npz files'
m_all = np.mean(np.stack(means, 0), 0)
C = m_all.shape[0]
print('C (num classes)=', C)
bg_idx = int(np.argmax(m_all))
print('Per-class mean (first 10)=', np.round(m_all[:10], 6).tolist(), '...')
print('Inferred bg_idx =', bg_idx, '(0-based). m_all[bg]=', float(m_all[bg_idx]))
assert C == 21, f'Unexpected class count {C}; expected 21'

# Cross-check with training cache labels (expect labels in 0..20 and background label id likely 0) 
train_cache = sorted(glob.glob('cache_v16/train_*.npz'))
tr_y_min = tr_y_max = None
if train_cache:
    # sample a few files to check label ranges
    samp = random.sample(train_cache, min(5, len(train_cache)))
    mins, maxs = [], []
    for fn in samp:
        zt = np.load(fn)
        if 'y' in zt: y = zt['y']
        elif 'labels' in zt: y = zt['labels']
        else: continue
        if y.ndim != 1: continue
        mins.append(int(y.min())); maxs.append(int(y.max()))
    if mins:
        tr_y_min, tr_y_max = min(mins), max(maxs)
        print('Train cache y range across sample: min=', tr_y_min, 'max=', tr_y_max)
else:
    print('[Warn] No cache_v16 training npz found to cross-check labels')

# Persist inferred bg index for downstream decoders to consume
with open('bg_index.json', 'w') as f:
    json.dump({'bg_idx': bg_idx, 'C': int(C), 'source': 'oof_probs_v16', 'train_y_min': tr_y_min, 'train_y_max': tr_y_max}, f)
print('Saved bg_index.json with bg_idx=', bg_idx)

print('\n=== submission.csv sanity ===')
assert os.path.exists('submission.csv'), 'submission.csv missing'
s = pd.read_csv('submission.csv')
print('submission shape:', s.shape)
head_checks = [ (len(x.split()), len(set(x.split()))) for x in s['Sequence'].head(3) ]
print('first 3 rows (len, unique):', head_checks)
assert s.shape[0]==95 and set(s.columns)=={'Id','Sequence'}, 'submission.csv shape/columns invalid'
print('OK: submission.csv looks well-formed')

print('\n=== Bag-of-classes quick baseline check (no write) ===')
test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()
test_prob_dir = 'test_probs_v16'
def load_probs_dir_simple(d, ids):
    out = {}
    import os
    for vid in ids:
        for pat in (f'{vid}.npz', f'{vid:05d}.npz', f'test_{vid:05d}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z2 = np.load(fn, allow_pickle=True)
                if 'probs' in z2: out[vid] = z2['probs'].astype(np.float32)
                elif 'P' in z2: out[vid] = z2['P'].astype(np.float32)
                break
    return out
mp = load_probs_dir_simple(test_prob_dir, test_ids)
if len(mp)==95:
    vid0 = sorted(mp.keys())[0]
    P0 = mp[vid0]
    # Use inferred bg_idx when ignoring background
    cols = [i for i in range(C) if i != bg_idx]
    score = P0[:, cols].sum(0)
    order = np.argsort(-score) + 1  # class ids assumed 1..20 mapped in same order as columns excluding bg
    print('Bag-of-classes sample video', vid0, 'bg_idx', bg_idx, 'top5 (1-based class ids approximation):', order[:5].tolist())
else:
    print('Note: test_probs_v16 missing for some vids:', len(mp))

print('\nNOTE: bg_idx != 0 indicates a class mapping shift. Downstream decoders should use bg_idx from bg_index.json instead of assuming 0.')

=== OOF/class mapping sanity ===
oof files count= 297 showing first 3
 sample 0 keys= ['probs', 'seq', 'sid'] shape= (627, 21)
 sample 1 keys= ['probs', 'seq', 'sid'] shape= (559, 21)
 sample 2 keys= ['probs', 'seq', 'sid'] shape= (668, 21)
C (num classes)= 21
Per-class mean (first 10)= [0.25760599970817566, 0.036462001502513885, 0.03442699834704399, 0.037664998322725296, 0.03947199881076813, 0.03866399824619293, 0.03352800011634827, 0.036205001175403595, 0.0348379984498024, 0.04036400094628334] ...
Inferred bg_idx = 0 (0-based). m_all[bg]= 0.25760623812675476
Train cache y range across sample: min= 0 max= 20
Saved bg_index.json with bg_idx= 0

=== submission.csv sanity ===
submission shape: (95, 2)
first 3 rows (len, unique): [(20, 20), (20, 20), (20, 20)]
OK: submission.csv looks well-formed

=== Bag-of-classes quick baseline check (no write) ===
Bag-of-classes sample video 300 bg_idx 0 top5 (1-based class ids approximation): [18, 11, 12, 15, 20]

NOTE: bg_idx != 0 indicates a class 

In [46]:
# Build labeled OOF for v16 with 5-fold GroupKFold (per-video), save per-video probs+y
import os, time, glob, numpy as np, pandas as pd, xgboost as xgb
from sklearn.model_selection import GroupKFold

out_dir = 'oof_probs_v16_labeled'
os.makedirs(out_dir, exist_ok=True)

train_meta = pd.read_csv('training.csv')
all_vids = train_meta['Id'].astype(int).tolist()

def load_train_v16(vid):
    fn = os.path.join('cache_v16', f'train_{vid:05d}.npz')
    if not os.path.exists(fn):
        return None, None
    z = np.load(fn, allow_pickle=False)
    X = z['X'].astype(np.float32)
    y = z['y'].astype(np.int32) if 'y' in z else (z['labels'].astype(np.int32) if 'labels' in z else None)
    return X, y

# Collect available videos (with labels) and their lengths
vids, Xs, Ys, lengths = [], [], [], []
t0 = time.time()
for i, vid in enumerate(all_vids, 1):
    Xv, yv = load_train_v16(vid)
    if Xv is None or yv is None or len(yv) != len(Xv):
        continue
    vids.append(int(vid)); Xs.append(Xv); Ys.append(yv); lengths.append(len(yv))
    if i % 25 == 0:
        print(f'[Load] {i}/{len(all_vids)} vids scanned; kept={len(vids)}', flush=True)
print(f'[Load] Done. kept vids={len(vids)} elapsed={time.time()-t0:.1f}s')
assert vids, 'No v16 training videos with labels found in cache_v16'

# Prepare GroupKFold over videos
n_splits = 5
gkf = GroupKFold(n_splits=n_splits)
vids_arr = np.array(vids, dtype=np.int32)
groups = vids_arr.copy()  # group by video id

params = {
    'objective': 'multi:softprob',
    'num_class': 21,
    'tree_method': 'hist',
    'device': 'cuda',
    'max_bin': 256,
    'max_depth': 8,
    'eta': 0.05,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'min_child_weight': 3,
    'gamma': 0.0,
    'reg_lambda': 1.0,
    'reg_alpha': 0.0,
    'eval_metric': 'mlogloss'
}
print('[XGB] Params:', params)

def concat_by_indices(idxs):
    X_list, y_list = [], []
    for j in idxs:
        X_list.append(Xs[j]); y_list.append(Ys[j])
    Xc = np.concatenate(X_list, 0); yc = np.concatenate(y_list, 0)
    return Xc, yc

t_all = time.time()
fold_times = []
for fold, (tr_idx, va_idx) in enumerate(gkf.split(vids_arr, groups=groups), 1):
    t_fold = time.time()
    tr_vids = vids_arr[tr_idx].tolist(); va_vids = vids_arr[va_idx].tolist()
    print(f'\n[Fold {fold}/{n_splits}] tr_videos={len(tr_vids)} va_videos={len(va_vids)}')
    X_tr, y_tr = concat_by_indices(tr_idx)
    X_va, y_va = concat_by_indices(va_idx)
    print(f'[Fold {fold}] X_tr={X_tr.shape} y_tr={y_tr.shape} | X_va={X_va.shape} y_va={y_va.shape}', flush=True)

    dtrain = xgb.DMatrix(X_tr, label=y_tr)
    dvalid = xgb.DMatrix(X_va, label=y_va)
    evals = [(dtrain, 'train'), (dvalid, 'valid')]
    bst = xgb.train(
        params,
        dtrain,
        num_boost_round=1500,
        evals=evals,
        early_stopping_rounds=80,
        verbose_eval=50
    )
    best_iter = getattr(bst, 'best_iteration', None)
    print(f'[Fold {fold}] train done; best_iter={best_iter} best_score={getattr(bst, "best_score", float("nan"))}')

    # Predict and save for each validation video separately
    # Build index ranges to slice back per video
    # We iterate va_idx order and predict per video directly for memory clarity
    saved = 0
    for j_idx in va_idx:
        vid = int(vids[j_idx])
        Xv = Xs[j_idx]; yv = Ys[j_idx]
        dm = xgb.DMatrix(Xv)
        if best_iter is not None:
            P = bst.predict(dm, iteration_range=(0, best_iter+1))
        else:
            P = bst.predict(dm)
        P = P.astype(np.float32)
        # Normalize rows just in case
        P /= np.clip(P.sum(1, keepdims=True), 1e-8, None)
        out_fn = os.path.join(out_dir, f'oof_{vid:05d}.npz')
        np.savez_compressed(out_fn, probs=P, y=yv.astype(np.int32), sid=np.array([vid], dtype=np.int32))
        saved += 1
        if saved % 10 == 0:
            print(f'[Fold {fold}] saved {saved}/{len(va_idx)}', flush=True)
    elapsed_fold = time.time() - t_fold
    fold_times.append(elapsed_fold)
    print(f'[Fold {fold}] done. Saved {saved} videos. Elapsed {elapsed_fold:.1f}s', flush=True)

print(f'\n[OOF] Completed {n_splits}-fold OOF build into {out_dir}. Total elapsed {time.time()-t_all:.1f}s. Per-fold times: {[round(x,1) for x in fold_times]}')

# Quick verify: count files and inspect one
files = sorted(glob.glob(os.path.join(out_dir, 'oof_*.npz')))
print('[OOF] files:', len(files))
if files:
    z = np.load(files[0])
    print('[OOF] sample keys:', list(z.files), 'shape:', z['probs'].shape, 'y:', z['y'].shape, 'sid:', z['sid'])
else:
    print('[OOF] WARNING: no files written!')

print('[Next] Fit per-class temperatures on oof_probs_v16_labeled and rerun v16-only S4/S5 decoders as per expert settings.')

[Load] 25/297 vids scanned; kept=25


[Load] 50/297 vids scanned; kept=50


[Load] 75/297 vids scanned; kept=75


[Load] 100/297 vids scanned; kept=100


[Load] 125/297 vids scanned; kept=125


[Load] 150/297 vids scanned; kept=150


[Load] 175/297 vids scanned; kept=175


[Load] 200/297 vids scanned; kept=200


[Load] 225/297 vids scanned; kept=225


[Load] 250/297 vids scanned; kept=250


[Load] 275/297 vids scanned; kept=275


[Load] Done. kept vids=297 elapsed=0.7s
[XGB] Params: {'objective': 'multi:softprob', 'num_class': 21, 'tree_method': 'hist', 'device': 'cuda', 'max_bin': 256, 'max_depth': 8, 'eta': 0.05, 'subsample': 0.8, 'colsample_bytree': 0.8, 'min_child_weight': 3, 'gamma': 0.0, 'reg_lambda': 1.0, 'reg_alpha': 0.0, 'eval_metric': 'mlogloss'}

[Fold 1/5] tr_videos=237 va_videos=60
[Fold 1] X_tr=(149902, 193) y_tr=(149902,) | X_va=(37394, 193) y_va=(37394,)


[0]	train-mlogloss:2.85741	valid-mlogloss:2.88900


[50]	train-mlogloss:1.21769	valid-mlogloss:1.65645


[100]	train-mlogloss:0.81797	valid-mlogloss:1.47703


[150]	train-mlogloss:0.59806	valid-mlogloss:1.41203


[200]	train-mlogloss:0.45191	valid-mlogloss:1.37899


[250]	train-mlogloss:0.34715	valid-mlogloss:1.35705


[300]	train-mlogloss:0.27510	valid-mlogloss:1.34505


[350]	train-mlogloss:0.22160	valid-mlogloss:1.33679


[400]	train-mlogloss:0.18336	valid-mlogloss:1.33380


[450]	train-mlogloss:0.15557	valid-mlogloss:1.33186


[500]	train-mlogloss:0.13548	valid-mlogloss:1.33262


[547]	train-mlogloss:0.12149	valid-mlogloss:1.33434


[Fold 1] train done; best_iter=467 best_score=1.331653626302092


[Fold 1] saved 10/60


[Fold 1] saved 20/60


[Fold 1] saved 30/60


[Fold 1] saved 40/60


[Fold 1] saved 50/60


[Fold 1] saved 60/60


[Fold 1] done. Saved 60 videos. Elapsed 135.3s



[Fold 2/5] tr_videos=237 va_videos=60
[Fold 2] X_tr=(148508, 193) y_tr=(148508,) | X_va=(38788, 193) y_va=(38788,)


[0]	train-mlogloss:2.85956	valid-mlogloss:2.87773


[50]	train-mlogloss:1.22682	valid-mlogloss:1.60540


[100]	train-mlogloss:0.82533	valid-mlogloss:1.42024


[150]	train-mlogloss:0.60027	valid-mlogloss:1.35297


[200]	train-mlogloss:0.44923	valid-mlogloss:1.31585


[250]	train-mlogloss:0.34223	valid-mlogloss:1.29214


[300]	train-mlogloss:0.26686	valid-mlogloss:1.27722


[350]	train-mlogloss:0.21322	valid-mlogloss:1.26650


[400]	train-mlogloss:0.17342	valid-mlogloss:1.25869


[450]	train-mlogloss:0.14480	valid-mlogloss:1.25373


[500]	train-mlogloss:0.12388	valid-mlogloss:1.25099


[550]	train-mlogloss:0.10816	valid-mlogloss:1.25086


[600]	train-mlogloss:0.09678	valid-mlogloss:1.25096


[650]	train-mlogloss:0.08820	valid-mlogloss:1.25264


[674]	train-mlogloss:0.08483	valid-mlogloss:1.25340


[Fold 2] train done; best_iter=594 best_score=1.2505774837550496


[Fold 2] saved 10/60


[Fold 2] saved 20/60


[Fold 2] saved 30/60


[Fold 2] saved 40/60


[Fold 2] saved 50/60


[Fold 2] saved 60/60


[Fold 2] done. Saved 60 videos. Elapsed 156.8s



[Fold 3/5] tr_videos=238 va_videos=59
[Fold 3] X_tr=(150043, 193) y_tr=(150043,) | X_va=(37253, 193) y_va=(37253,)


[0]	train-mlogloss:2.85825	valid-mlogloss:2.88202


[50]	train-mlogloss:1.22041	valid-mlogloss:1.64381


[100]	train-mlogloss:0.82810	valid-mlogloss:1.47689


[150]	train-mlogloss:0.60895	valid-mlogloss:1.41730


[200]	train-mlogloss:0.45721	valid-mlogloss:1.38642


[250]	train-mlogloss:0.34996	valid-mlogloss:1.36782


[300]	train-mlogloss:0.27444	valid-mlogloss:1.35749


[350]	train-mlogloss:0.22066	valid-mlogloss:1.35054


[400]	train-mlogloss:0.18120	valid-mlogloss:1.34762


[450]	train-mlogloss:0.15296	valid-mlogloss:1.34827


[485]	train-mlogloss:0.13759	valid-mlogloss:1.34909


[Fold 3] train done; best_iter=405 best_score=1.3472665489331255


[Fold 3] saved 10/59


[Fold 3] saved 20/59


[Fold 3] saved 30/59


[Fold 3] saved 40/59


[Fold 3] saved 50/59


[Fold 3] done. Saved 59 videos. Elapsed 116.6s



[Fold 4/5] tr_videos=238 va_videos=59
[Fold 4] X_tr=(150274, 193) y_tr=(150274,) | X_va=(37022, 193) y_va=(37022,)


[0]	train-mlogloss:2.85944	valid-mlogloss:2.88222


[50]	train-mlogloss:1.23094	valid-mlogloss:1.60453


[100]	train-mlogloss:0.83566	valid-mlogloss:1.42075


[150]	train-mlogloss:0.61479	valid-mlogloss:1.35314


[200]	train-mlogloss:0.46312	valid-mlogloss:1.31893


[250]	train-mlogloss:0.35823	valid-mlogloss:1.29781


[300]	train-mlogloss:0.28332	valid-mlogloss:1.28553


[350]	train-mlogloss:0.22829	valid-mlogloss:1.27762


[400]	train-mlogloss:0.18955	valid-mlogloss:1.27526


[450]	train-mlogloss:0.16093	valid-mlogloss:1.27542


[500]	train-mlogloss:0.13974	valid-mlogloss:1.27514


[550]	train-mlogloss:0.12428	valid-mlogloss:1.27813


[562]	train-mlogloss:0.12127	valid-mlogloss:1.27929


[Fold 4] train done; best_iter=482 best_score=1.2746830797070472


[Fold 4] saved 10/59


[Fold 4] saved 20/59


[Fold 4] saved 30/59


[Fold 4] saved 40/59


[Fold 4] saved 50/59


[Fold 4] done. Saved 59 videos. Elapsed 138.5s



[Fold 5/5] tr_videos=238 va_videos=59
[Fold 5] X_tr=(150457, 193) y_tr=(150457,) | X_va=(36839, 193) y_va=(36839,)


[0]	train-mlogloss:2.85762	valid-mlogloss:2.88318


[50]	train-mlogloss:1.22375	valid-mlogloss:1.63633


[100]	train-mlogloss:0.82693	valid-mlogloss:1.45864


[150]	train-mlogloss:0.60572	valid-mlogloss:1.39475


[200]	train-mlogloss:0.45671	valid-mlogloss:1.35973


[250]	train-mlogloss:0.35054	valid-mlogloss:1.33689


[300]	train-mlogloss:0.27491	valid-mlogloss:1.32293


[350]	train-mlogloss:0.21991	valid-mlogloss:1.31410


[400]	train-mlogloss:0.18076	valid-mlogloss:1.30834


[450]	train-mlogloss:0.15193	valid-mlogloss:1.30558


[500]	train-mlogloss:0.13116	valid-mlogloss:1.30597


[542]	train-mlogloss:0.11788	valid-mlogloss:1.30666


[Fold 5] train done; best_iter=462 best_score=1.3053215063743122


[Fold 5] saved 10/59


[Fold 5] saved 20/59


[Fold 5] saved 30/59


[Fold 5] saved 40/59


[Fold 5] saved 50/59


[Fold 5] done. Saved 59 videos. Elapsed 129.9s



[OOF] Completed 5-fold OOF build into oof_probs_v16_labeled. Total elapsed 677.2s. Per-fold times: [135.3, 156.8, 116.6, 138.5, 129.9]
[OOF] files: 297
[OOF] sample keys: ['probs', 'y', 'sid'] shape: (627, 21) y: (627,) sid: [1]
[Next] Fit per-class temperatures on oof_probs_v16_labeled and rerun v16-only S4/S5 decoders as per expert settings.


In [47]:
# Fit per-class temperatures from labeled OOF (v16) and run targeted v16-only decoders
import os, glob, numpy as np, pandas as pd
from scipy.special import logit
from scipy.signal import convolve
from scipy.optimize import linear_sum_assignment

def load_oof_frames_labeled(oof_dirs):
    X, Y = [], []
    for d in oof_dirs:
        for fn in glob.glob(os.path.join(d, '*.npz')):
            z = np.load(fn, allow_pickle=True)
            if 'probs' in z: P = z['probs'].astype(np.float32)
            elif 'P' in z: P = z['P'].astype(np.float32)
            else: continue
            if 'y' in z: y = z['y'].astype(np.int32)
            elif 'labels' in z: y = z['labels'].astype(np.int32)
            else: continue
            if P.ndim == 2 and len(y) == P.shape[0]:
                X.append(P); Y.append(y)
    if not X: return None, None
    return np.concatenate(X, 0), np.concatenate(Y, 0)

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    if P_oof is None or y_oof is None:
        return np.ones(21, dtype=np.float32)
    C = P_oof.shape[1]
    Tcls = np.ones(C, dtype=np.float32)
    y = y_oof
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc)
        yc = (y == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logits(P, Tcls, bg_bias, smooth_win, g=1.0):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32)
    Z = Z / (Tcls.reshape(1, -1) * float(g))
    Z[:, 0] += float(bg_bias)
    if smooth_win and smooth_win > 1:
        k = np.ones(int(smooth_win), np.float32) / float(smooth_win)
        Z = np.stack([convolve(Z[:, i], k, mode='same') for i in range(Z.shape[1])], 1).astype(np.float32)
    return Z

def segment_exact20(margin, K, min_len):
    T = int(len(margin))
    min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1:
        min_len -= 1
    if K*min_len > T:
        K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32)
    bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv:
                    bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []
    k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse()
    return bounds

def decode_with_cost(Z, alpha, cost_mode='median'):
    T = Z.shape[0]
    min_len = max(2, int(alpha * T / 20.0))
    margin = (Z[:, 1:21].max(1) - Z[:, 0]).astype(np.float32)
    segs = segment_exact20(margin, 20, min_len)
    C = np.zeros((len(segs), 20), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = Z[s:e, 1:21]
        if cost_mode == 'median':
            agg = np.median(seg, axis=0)
        elif cost_mode == 'trimmed_mean_20':
            L = seg.shape[0]; lo = int(0.1*L); hi = max(lo+1, int(0.9*L))
            agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        else:
            agg = seg.mean(0)
        C[j, :] = -agg
    r, c = linear_sum_assignment(C)
    order = [int(c[j])+1 for j in np.argsort(r)]
    return order

def load_probs_dir_simple(d, ids):
    out = {}
    for vid in ids:
        for pat in (f'{vid}.npz', f'{vid:05d}.npz', f'test_{vid:05d}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: out[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: out[vid] = z['P'].astype(np.float32)
                break
    return out

# 1) Fit Tcls from labeled OOF
P_oof, y_oof = load_oof_frames_labeled(['oof_probs_v16_labeled'])
assert P_oof is not None and y_oof is not None, 'Labeled OOF not found'
temp_grid = np.linspace(0.7, 1.6, 12).astype(np.float32)
Tcls16 = fit_per_class_temperature(P_oof, y_oof, temp_grid)
print('[Calib] Tcls16 (first 10):', np.round(Tcls16[:10], 3).tolist())

# 2) Load test probs (v16 only)
test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()
mp = load_probs_dir_simple('test_probs_v16', test_ids)
print('[Test v16] videos:', len(mp))

def run_variant(name, alpha, bg_bias, smooth, cost_mode='median', g=0.95):
    rows = []
    for vid in sorted(mp.keys()):
        P = mp[vid]
        Z = to_calibrated_logits(P, Tcls16, bg_bias, smooth, g=g)
        seq = decode_with_cost(Z, alpha, cost_mode=cost_mode)
        rows.append((vid, ' '.join(map(str, seq))))
    sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
    out = f'submission_v16only_{name}.csv'
    sub.to_csv(out, index=False)
    print('[Write]', out, sub.shape)
    return out

# 3) Minimal targeted runs per expert:
# S4 median: g in {0.95, 0.90}, alpha=0.85, bg_bias=0.25, smooth=3
out_s4_g095 = run_variant('S4_median_a0.85_bb0.25_s3_g0.95', alpha=0.85, bg_bias=0.25, smooth=3, cost_mode='median', g=0.95)
out_s4_g090 = run_variant('S4_median_a0.85_bb0.25_s3_g0.90', alpha=0.85, bg_bias=0.25, smooth=3, cost_mode='median', g=0.90)
# S5 trimmed mean: g=0.95, alpha=0.85, bg_bias=0.20, smooth=3
out_s5 = run_variant('S5_trim_a0.85_bb0.20_s3_g0.95', alpha=0.85, bg_bias=0.20, smooth=3, cost_mode='trimmed_mean_20', g=0.95)
# Optional: S4 median smooth=1, g=0.95
out_s4_s1 = run_variant('S4_median_a0.85_bb0.25_s1_g0.95', alpha=0.85, bg_bias=0.25, smooth=1, cost_mode='median', g=0.95)

# 4) Set primary submission.csv to S4 g=0.95 (median, smooth=3) as first pick
primary = out_s4_g095
pd.read_csv(primary).to_csv('submission.csv', index=False)
print('[Primary] submission.csv <-', primary)

[Calib] Tcls16 (first 10): [1.190999984741211, 1.0269999504089355, 1.1089999675750732, 1.0269999504089355, 1.0269999504089355, 1.0269999504089355, 1.1089999675750732, 1.1089999675750732, 1.0269999504089355, 1.1089999675750732]
[Test v16] videos: 95


[Write] submission_v16only_S4_median_a0.85_bb0.25_s3_g0.95.csv (95, 2)


[Write] submission_v16only_S4_median_a0.85_bb0.25_s3_g0.90.csv (95, 2)


[Write] submission_v16only_S5_trim_a0.85_bb0.20_s3_g0.95.csv (95, 2)


[Write] submission_v16only_S4_median_a0.85_bb0.25_s1_g0.95.csv (95, 2)
[Primary] submission.csv <- submission_v16only_S4_median_a0.85_bb0.25_s3_g0.95.csv


In [48]:
# Compute OOF Levenshtein (v16-only) using labeled OOF to gate submissions
import os, glob, numpy as np, pandas as pd
from scipy.special import logit
from scipy.signal import convolve
from scipy.optimize import linear_sum_assignment

def load_oof_per_video(oof_dir):
    vids, P_map = [], {}
    files = sorted(glob.glob(os.path.join(oof_dir, 'oof_*.npz')))
    for fn in files:
        z = np.load(fn, allow_pickle=True)
        sid = int(z['sid'][0]) if 'sid' in z else int(os.path.basename(fn).split('_')[1].split('.')[0])
        if 'probs' in z: P = z['probs'].astype(np.float32)
        elif 'P' in z: P = z['P'].astype(np.float32)
        else: continue
        P_map[sid] = P
        vids.append(sid)
    return sorted(vids), P_map

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    C = P_oof.shape[1]; Tcls = np.ones(C, dtype=np.float32)
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc)
        yc = (y_oof == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logits(P, Tcls, bg_bias, smooth_win, g=0.95):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32)
    Z = Z / (Tcls.reshape(1, -1) * float(g))
    Z[:, 0] += float(bg_bias)
    if smooth_win and smooth_win > 1:
        k = np.ones(int(smooth_win), np.float32) / float(smooth_win)
        Z = np.stack([convolve(Z[:, i], k, mode='same') for i in range(Z.shape[1])], 1).astype(np.float32)
    return Z

def segment_exact20(margin, K, min_len):
    T = int(len(margin)); min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1: min_len -= 1
    if K*min_len > T: K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32); bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv: bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []; k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse(); return bounds

def decode_with_cost(Z, alpha, cost_mode='median'):
    T = Z.shape[0]
    min_len = max(2, int(alpha * T / 20.0))
    margin = (Z[:, 1:21].max(1) - Z[:, 0]).astype(np.float32)
    segs = segment_exact20(margin, 20, min_len)
    C = np.zeros((len(segs), 20), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = Z[s:e, 1:21]
        if cost_mode == 'median':
            agg = np.median(seg, axis=0)
        elif cost_mode == 'trimmed_mean_20':
            L = seg.shape[0]; lo = int(0.1*L); hi = max(lo+1, int(0.9*L))
            agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        else:
            agg = seg.mean(0)
        C[j, :] = -agg
    r, c = linear_sum_assignment(C)
    order = [int(c[j])+1 for j in np.argsort(r)]
    return order

def levenshtein(a, b):
    # a, b are lists of ints
    n, m = len(a), len(b)
    dp = list(range(m+1))
    for i in range(1, n+1):
        prev, dp[0] = dp[0], i
        for j in range(1, m+1):
            cur = min(dp[j] + 1, dp[j-1] + 1, prev + (0 if a[i-1]==b[j-1] else 1))
            prev, dp[j] = dp[j], cur
    return dp[m]

# Load labeled OOF frames for calibration and per-video probs for decoding
vids, Pmap = load_oof_per_video('oof_probs_v16_labeled')
assert len(vids) > 0 and len(Pmap) == len(vids), 'OOF per-video probs missing'
train_df = pd.read_csv('training.csv')[['Id','Sequence']].astype({'Id': int})
seq_gt = {int(r.Id): [int(x) for x in str(r.Sequence).split()] for r in train_df.itertuples(index=False)}

# Fit temperatures from all OOF frames
X_all, y_all = [], []
for fn in glob.glob(os.path.join('oof_probs_v16_labeled', 'oof_*.npz')):
    z = np.load(fn, allow_pickle=True)
    if 'probs' in z and 'y' in z:
        X_all.append(z['probs'].astype(np.float32))
        y_all.append(z['y'].astype(np.int32))
P_oof = np.concatenate(X_all, 0); y_oof = np.concatenate(y_all, 0)
Tcls16 = fit_per_class_temperature(P_oof, y_oof, np.linspace(0.7, 1.6, 12).astype(np.float32))
print('[OOF-Calib] Tcls16 first10:', np.round(Tcls16[:10], 3).tolist())

def eval_cfg(alpha, bg_bias, smooth, g, cost_mode='median', tag=''):
    dists = []
    for vid in vids:
        P = Pmap.get(int(vid))
        if P is None: continue
        Z = to_calibrated_logits(P, Tcls16, bg_bias, smooth, g=g)
        pred = decode_with_cost(Z, alpha, cost_mode=cost_mode)
        gt = seq_gt.get(int(vid), [])
        if len(gt) != 20 or len(pred) != 20: continue
        d = levenshtein(pred, gt) / 20.0
        dists.append(d)
    score = float(np.mean(dists)) if dists else 1.0
    print(f'[OOF-Lev] {tag} -> {score:.5f} over {len(dists)} vids')
    return score

# Evaluate expert configs
s1 = eval_cfg(alpha=0.85, bg_bias=0.25, smooth=3, g=0.95, cost_mode='median', tag='S4 median g=0.95')
s2 = eval_cfg(alpha=0.85, bg_bias=0.25, smooth=3, g=0.90, cost_mode='median', tag='S4 median g=0.90')
s3 = eval_cfg(alpha=0.85, bg_bias=0.20, smooth=3, g=0.95, cost_mode='trimmed_mean_20', tag='S5 trim g=0.95')
best = min(s1, s2, s3)
print('[OOF-Lev] Best=', best)

# If S4 g=0.90 is best and the corresponding test submission exists from v16-only runs, swap it in
cand = None
if s2 <= min(s1, s3):
    cand = 'submission_v16only_S4_median_a0.85_bb0.25_s3_g0.90.csv'
elif s1 <= min(s2, s3):
    cand = 'submission_v16only_S4_median_a0.85_bb0.25_s3_g0.95.csv'
else:
    cand = 'submission_v16only_S5_trim_a0.85_bb0.20_s3_g0.95.csv'
if os.path.exists(cand):
    pd.read_csv(cand).to_csv('submission.csv', index=False)
    print('[Submission swap by OOF] submission.csv <-', cand)
else:
    print('[Submission swap] candidate missing, keeping current submission.csv')

[OOF-Calib] Tcls16 first10: [1.190999984741211, 1.0269999504089355, 1.1089999675750732, 1.0269999504089355, 1.0269999504089355, 1.0269999504089355, 1.1089999675750732, 1.1089999675750732, 1.0269999504089355, 1.1089999675750732]


[OOF-Lev] S4 median g=0.95 -> 0.45873 over 252 vids


[OOF-Lev] S4 median g=0.90 -> 0.45873 over 252 vids


[OOF-Lev] S5 trim g=0.95 -> 0.39702 over 252 vids
[OOF-Lev] Best= 0.3970238095238095
[Submission swap by OOF] submission.csv <- submission_v16only_S5_trim_a0.85_bb0.20_s3_g0.95.csv


In [49]:
# LOG-PROB OOF eval + global permutation + final v16-only submission
import os, glob, numpy as np, pandas as pd
from scipy.special import logit, softmax
from scipy.optimize import linear_sum_assignment

def logsumexp(a, axis=1):
    m = np.max(a, axis=axis, keepdims=True)
    return (m + np.log(np.clip(np.sum(np.exp(a - m), axis=axis, keepdims=True), 1e-12, None))).squeeze(axis)

def to_calibrated_logprobs(P, Tcls, bg_bias, smooth_win, g=1.0):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32) / (Tcls.reshape(1, -1) * float(g))
    Pcal = softmax(Z, axis=1).astype(np.float32)
    Pcal /= np.clip(Pcal.sum(1, keepdims=True), 1e-6, None)
    L = np.log(np.clip(Pcal, 1e-6, 1.0)).astype(np.float32)
    L[:, 0] += float(bg_bias)
    if smooth_win and smooth_win > 1:
        k = np.ones(int(smooth_win), np.float32) / float(smooth_win)
        L = np.stack([np.convolve(L[:, i], k, mode='same') for i in range(L.shape[1])], 1).astype(np.float32)
    return L

def segment_exact20(margin, K, min_len):
    T = int(len(margin)); min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1: min_len -= 1
    if K*min_len > T: K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32); bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv: bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []; k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse(); return bounds

def decode_model_idx(L, alpha):
    T, C = L.shape; K = 20
    min_len = max(2, int(alpha * T / float(K)))
    cols = list(range(1, C))
    margin = (logsumexp(L[:, cols], axis=1) - L[:, 0]).astype(np.float32)
    segs = segment_exact20(margin, K, min_len)
    Cmat = np.zeros((len(segs), K), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = L[s:e, 1:21]
        agg = np.median(seg, axis=0)
        Cmat[j, :] = -agg
    r, c = linear_sum_assignment(Cmat)
    order_model = [int(c[j]) for j in np.argsort(r)]
    return order_model, segs

def decode_with_perm(L, alpha, perm, cost_mode='median'):
    T, C = L.shape; K = 20
    min_len = max(2, int(alpha * T / float(K)))
    cols = list(range(1, C))
    margin = (logsumexp(L[:, cols], axis=1) - L[:, 0]).astype(np.float32)
    segs = segment_exact20(margin, K, min_len)
    Cmat = np.zeros((len(segs), K), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = L[s:e, 1:21]
        if cost_mode == 'trimmed_mean_20':
            n = seg.shape[0]; lo = int(0.1*n); hi = max(lo+1, int(0.9*n))
            agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        elif cost_mode == 'median':
            agg = np.median(seg, axis=0)
        else:
            agg = seg.mean(0)
        Cmat[j, :] = -agg
    r, c = linear_sum_assignment(Cmat)
    model_idx = [int(c[j]) for j in np.argsort(r)]
    return [int(perm[i]) for i in model_idx]

def levenshtein(a, b):
    n, m = len(a), len(b); dp = list(range(m+1))
    for i in range(1, n+1):
        prev, dp[0] = dp[0], i
        for j in range(1, m+1):
            cur = min(dp[j]+1, dp[j-1]+1, prev + (0 if a[i-1]==b[j-1] else 1))
            prev, dp[j] = dp[j], cur
    return dp[m]

# Load labeled OOF per-video
OOF_DIR = 'oof_probs_v16_labeled'
files = sorted(glob.glob(os.path.join(OOF_DIR, 'oof_*.npz')))
vids, Pmap, Ymap = [], {}, {}
for fn in files:
    z = np.load(fn, allow_pickle=True)
    sid = int(z['sid'][0]) if 'sid' in z else int(os.path.basename(fn).split('_')[1].split('.')[0])
    Pmap[sid] = z['probs'].astype(np.float32); Ymap[sid] = z['y'].astype(np.int32); vids.append(sid)
vids = sorted(vids)

# Fit per-class temperatures from labeled OOF
P_oof = np.concatenate([Pmap[v] for v in vids], 0)
y_oof = np.concatenate([Ymap[v] for v in vids], 0)
temp_grid = np.linspace(0.7, 1.6, 12).astype(np.float32)
def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    C = P_oof.shape[1]; Tcls = np.ones(C, np.float32)
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc); yc = (y_oof == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls
Tcls16 = fit_per_class_temperature(P_oof, y_oof, temp_grid)

# GT sequences
train_df = pd.read_csv('training.csv')[['Id','Sequence']].astype({'Id': int})
seq_gt = {int(r.Id): [int(x) for x in str(r.Sequence).split()] for r in train_df.itertuples(index=False)}

# Learn permutation (model 0..19 -> GT 1..20) by accumulating segment evidence
K = 20
S = np.zeros((K, K), np.float32)
for vid in vids:
    L = to_calibrated_logprobs(Pmap[vid], Tcls16, bg_bias=0.25, smooth_win=3, g=0.95)
    model_idx, segs = decode_model_idx(L, alpha=0.85)
    gt = seq_gt.get(int(vid), [])
    if len(gt) != K: continue
    for j, (s, e) in enumerate(segs):
        seg = L[s:e, 1:21]
        agg = np.median(seg, axis=0)
        S[:, int(gt[j]-1)] += agg

r, c = linear_sum_assignment(-S)  # maximize
perm = np.zeros(K, np.int32)
for i in range(K):
    j = int(np.where(r == i)[0][0]); perm[i] = int(c[j]) + 1

# OOF-Lev eval with log-prob + perm
def eval_oof(alpha, bg_bias, smooth, g, cost_mode):
    dists = []
    for vid in vids:
        L = to_calibrated_logprobs(Pmap[vid], Tcls16, bg_bias, smooth, g=g)
        pred = decode_with_perm(L, alpha, perm, cost_mode=cost_mode)
        gt = seq_gt.get(int(vid), [])
        if len(gt) != 20: continue
        dists.append(levenshtein(pred, gt)/20.0)
    return float(np.mean(dists)), len(dists)

s4_095, n1 = eval_oof(0.85, 0.25, 3, 0.95, 'median')
s4_090, n2 = eval_oof(0.85, 0.25, 3, 0.90, 'median')
s5_095, n3 = eval_oof(0.85, 0.20, 3, 0.95, 'trimmed_mean_20')
print(f'[OOF-Lev|logprob+perm] S4 g=0.95 -> {s4_095:.5f} ({n1} vids)')
print(f'[OOF-Lev|logprob+perm] S4 g=0.90 -> {s4_090:.5f} ({n2} vids)')
print(f'[OOF-Lev|logprob+perm] S5 g=0.95 -> {s5_095:.5f} ({n3} vids)')

def load_probs_dir_simple(d, ids):
    out = {}
    for vid in ids:
        for pat in (f'{vid}.npz', f'{vid:05d}.npz', f'test_{vid:05d}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: out[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: out[vid] = z['P'].astype(np.float32)
                break
    return out

def write_submission_v16(name, alpha, bg_bias, smooth, g, cost_mode='median'):
    test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()
    mp = load_probs_dir_simple('test_probs_v16', test_ids)
    rows = []
    for vid in sorted(mp.keys()):
        L = to_calibrated_logprobs(mp[vid], Tcls16, bg_bias, smooth, g=g)
        seq = decode_with_perm(L, alpha, perm, cost_mode=cost_mode)
        rows.append((vid, ' '.join(map(str, seq))))
    sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
    out = f'submission_v16_perm_{name}.csv'
    sub.to_csv(out, index=False)
    pd.read_csv(out).to_csv('submission.csv', index=False)
    print('[Wrote] submission.csv <-', out, sub.shape)
    return out

best_name, best_score = sorted([
    ('S4_a0.85_bb0.25_s3_g0.95', s4_095),
    ('S4_a0.85_bb0.25_s3_g0.90', s4_090),
    ('S5_a0.85_bb0.20_s3_g0.95', s5_095),
], key=lambda x: x[1])[0]

write_submission_v16(best_name, alpha=0.85,
                     bg_bias=0.25 if 'S4' in best_name else 0.20,
                     smooth=3, g=0.90 if 'g0.90' in best_name else 0.95,
                     cost_mode=('median' if 'S4' in best_name else 'trimmed_mean_20'))

[OOF-Lev|logprob+perm] S4 g=0.95 -> 0.44583 (252 vids)
[OOF-Lev|logprob+perm] S4 g=0.90 -> 0.44504 (252 vids)
[OOF-Lev|logprob+perm] S5 g=0.95 -> 0.39266 (252 vids)


[Wrote] submission.csv <- submission_v16_perm_S5_a0.85_bb0.20_s3_g0.95.csv (95, 2)


'submission_v16_perm_S5_a0.85_bb0.20_s3_g0.95.csv'

In [50]:
# Center-of-Mass (CoM) ordering on calibrated log-probs + global permutation; OOF-eval and write test submission
import os, glob, numpy as np, pandas as pd
from scipy.special import logit, softmax
from scipy.optimize import linear_sum_assignment

def to_calibrated_logprobs(P, Tcls, bg_bias=0.25, smooth_win=3, g=0.95):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32) / (Tcls.reshape(1, -1) * float(g))
    Pcal = softmax(Z, axis=1).astype(np.float32)
    Pcal /= np.clip(Pcal.sum(1, keepdims=True), 1e-6, None)
    L = np.log(np.clip(Pcal, 1e-6, 1.0)).astype(np.float32)
    L[:, 0] += float(bg_bias)
    if smooth_win and smooth_win > 1:
        k = np.ones(int(smooth_win), np.float32) / float(smooth_win)
        L = np.stack([np.convolve(L[:, i], k, mode='same') for i in range(L.shape[1])], 1).astype(np.float32)
    return L

def load_oof_labeled_map(oof_dir='oof_probs_v16_labeled'):
    files = sorted(glob.glob(os.path.join(oof_dir, 'oof_*.npz')))
    vids, Pmap, Ymap = [], {}, {}
    for fn in files:
        z = np.load(fn, allow_pickle=True)
        sid = int(z['sid'][0]) if 'sid' in z else int(os.path.basename(fn).split('_')[1].split('.')[0])
        Pmap[sid] = z['probs'].astype(np.float32); Ymap[sid] = z['y'].astype(np.int32); vids.append(sid)
    return sorted(vids), Pmap, Ymap

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    C = P_oof.shape[1]; Tcls = np.ones(C, np.float32)
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc); yc = (y_oof == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls

def center_of_mass_order(L):
    # L: [T, 21] log-probs; compute expected time (1..T) per non-bg class using probs
    T = L.shape[0]; idx = np.arange(1, T+1, dtype=np.float32).reshape(-1,1)
    P = np.exp(L[:, 1:21]).astype(np.float32)  # non-bg probs
    wsum = np.clip(P.sum(0), 1e-6, None); tnum = (idx * P).sum(0)
    tbar = (tnum / wsum)  # center-of-mass time per class
    # tie-breaker by peak height (higher peak earlier)
    peak = P.max(0)
    order_model = np.lexsort(( -peak, tbar ))  # ascending tbar, then descending peak
    return order_model.astype(np.int32)  # 0..19 (model class indices for non-bg)

def levenshtein(a, b):
    n, m = len(a), len(b); dp = list(range(m+1))
    for i in range(1, n+1):
        prev, dp[0] = dp[0], i
        for j in range(1, m+1):
            cur = min(dp[j]+1, dp[j-1]+1, prev + (0 if a[i-1]==b[j-1] else 1))
            prev, dp[j] = dp[j], cur
    return dp[m]

# 1) Load labeled OOF and fit temps
vids, Pmap, Ymap = load_oof_labeled_map('oof_probs_v16_labeled')
assert vids, 'No labeled OOF videos found'
P_oof = np.concatenate([Pmap[v] for v in vids], 0)
y_oof = np.concatenate([Ymap[v] for v in vids], 0)
Tcls16 = fit_per_class_temperature(P_oof, y_oof, np.linspace(0.7, 1.6, 12).astype(np.float32))
print('[CoM] Temps first10:', np.round(Tcls16[:10], 3).tolist())

# 2) Ground-truth sequences
train_df = pd.read_csv('training.csv')[['Id','Sequence']].astype({'Id': int})
seq_gt = {int(r.Id): [int(x) for x in str(r.Sequence).split()] for r in train_df.itertuples(index=False)}

# 3) Learn global permutation via evidence matrix using CoM ordering positions
K = 20; S = np.zeros((K, K), np.float32)
for vid in vids:
    L = to_calibrated_logprobs(Pmap[vid], Tcls16, bg_bias=0.25, smooth_win=3, g=0.95)
    order_model = center_of_mass_order(L)  # length 20, each 0..19
    gt = seq_gt.get(int(vid), [])
    if len(gt) != K: continue
    # Add evidence for mapping model class at position j -> gt class at same position
    # Weight by confidence: use per-class peak log-prob as strength
    Pn = L[:, 1:21]
    strength = Pn.max(0)  # shape (20,)
    for j, mi in enumerate(order_model):
        S[int(mi), int(gt[j]-1)] += strength[int(mi)]

r, c = linear_sum_assignment(-S)  # maximize S
perm = np.zeros(K, np.int32)
for i in range(K):
    j = int(np.where(r == i)[0][0]); perm[i] = int(c[j]) + 1  # map model idx-> class id 1..20
print('[CoM] Learned perm (first 10):', perm[:10].tolist())

# 4) OOF-Lev evaluation with CoM + perm
dists = []
for vid in vids:
    L = to_calibrated_logprobs(Pmap[vid], Tcls16, bg_bias=0.25, smooth_win=3, g=0.95)
    order_model = center_of_mass_order(L)
    pred = [int(perm[i]) for i in order_model.tolist()]
    gt = seq_gt.get(int(vid), [])
    if len(gt) != K: continue
    dists.append(levenshtein(pred, gt)/20.0)
oof_score = float(np.mean(dists)) if dists else 1.0
print(f'[OOF-Lev|CoM+perm] -> {oof_score:.5f} over {len(dists)} vids')

# 5) Write test submission with CoM + perm
def load_probs_dir_simple(d, ids):
    out = {}
    for vid in ids:
        for pat in (f'{vid}.npz', f'{vid:05d}.npz', f'test_{vid:05d}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: out[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: out[vid] = z['P'].astype(np.float32)
                break
    return out

test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()
mp = load_probs_dir_simple('test_probs_v16', test_ids)
rows = []
for vid in sorted(mp.keys()):
    L = to_calibrated_logprobs(mp[vid], Tcls16, bg_bias=0.25, smooth_win=3, g=0.95)
    order_model = center_of_mass_order(L)
    seq = [int(perm[i]) for i in order_model.tolist()]
    rows.append((vid, ' '.join(map(str, seq))))
sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
out = 'submission_v16_CoM_perm_g0.95_bb0.25_s3.csv'
sub.to_csv(out, index=False)
pd.read_csv(out).to_csv('submission.csv', index=False)
print('[Wrote] submission.csv <-', out, sub.shape)

[CoM] Temps first10: [1.190999984741211, 1.0269999504089355, 1.1089999675750732, 1.0269999504089355, 1.0269999504089355, 1.0269999504089355, 1.1089999675750732, 1.1089999675750732, 1.0269999504089355, 1.1089999675750732]
[CoM] Learned perm (first 10): [12, 1, 4, 11, 2, 13, 17, 16, 19, 6]


[OOF-Lev|CoM+perm] -> 0.93611 over 252 vids
[Wrote] submission.csv <- submission_v16_CoM_perm_g0.95_bb0.25_s3.csv (95, 2)


In [52]:
# Correct OOF-Lev using true GT from labeled OOF y (collapse runs), log-prob DP decoder (no perm), write best submission
import os, glob, numpy as np, pandas as pd
from scipy.special import logit, softmax
from scipy.optimize import linear_sum_assignment

def logsumexp(a, axis=1):
    m = np.max(a, axis=axis, keepdims=True)
    return (m + np.log(np.clip(np.sum(np.exp(a - m), axis=axis, keepdims=True), 1e-12, None))).squeeze(axis)

def load_oof_labeled(oof_dir='oof_probs_v16_labeled'):
    vids, Pmap, Ymap = [], {}, {}
    files = sorted(glob.glob(os.path.join(oof_dir, 'oof_*.npz')))
    for fn in files:
        z = np.load(fn, allow_pickle=True)
        sid = int(z['sid'][0]) if 'sid' in z else int(os.path.basename(fn).split('_')[1].split('.')[0])
        Pmap[sid] = z['probs'].astype(np.float32)
        Ymap[sid] = z['y'].astype(np.int32)
        vids.append(sid)
    return sorted(vids), Pmap, Ymap

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    C = P_oof.shape[1]; Tcls = np.ones(C, np.float32)
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc); yc = (y_oof == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logprobs(P, Tcls, bg_bias, smooth_win, g=1.0):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32) / (Tcls.reshape(1, -1) * float(g))
    Pcal = softmax(Z, axis=1).astype(np.float32)
    Pcal /= np.clip(Pcal.sum(1, keepdims=True), 1e-6, None)
    L = np.log(np.clip(Pcal, 1e-6, 1.0)).astype(np.float32)
    L[:, 0] += float(bg_bias)
    if smooth_win and smooth_win > 1:
        k = np.ones(int(smooth_win), np.float32) / float(smooth_win)
        L = np.stack([np.convolve(L[:, i], k, mode='same') for i in range(L.shape[1])], 1).astype(np.float32)
    return L

def segment_exact20(margin, K, min_len):
    T = int(len(margin)); min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1: min_len -= 1
    if K*min_len > T: K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32); bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv: bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []; k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse(); return bounds

def decode_with_cost_logprob(L, alpha, cost_mode='median'):
    T, C = L.shape; K = 20
    min_len = max(2, int(alpha * T / float(K)))
    cols = list(range(1, C))
    margin = (logsumexp(L[:, cols], axis=1) - L[:, 0]).astype(np.float32)
    segs = segment_exact20(margin, K, min_len)
    Cmat = np.zeros((len(segs), K), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = L[s:e, 1:21]
        if cost_mode == 'trimmed_mean_20':
            n = seg.shape[0]; lo = int(0.1*n); hi = max(lo+1, int(0.9*n))
            agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        elif cost_mode == 'median':
            agg = np.median(seg, axis=0)
        else:
            agg = seg.mean(0)
        Cmat[j, :] = -agg
    r, c = linear_sum_assignment(Cmat)
    order = [int(c[j])+1 for j in np.argsort(r)]  # map non-bg columns to class ids 1..20
    return order

def levenshtein(a, b):
    n, m = len(a), len(b); dp = list(range(m+1))
    for i in range(1, n+1):
        prev, dp[0] = dp[0], i
        for j in range(1, m+1):
            cur = min(dp[j]+1, dp[j-1]+1, prev + (0 if a[i-1]==b[j-1] else 1))
            prev, dp[j] = dp[j], cur
    return dp[m]

# 1) Load labeled OOF and build true GT sequences by collapsing runs of y>0
vids, Pmap, Ymap = load_oof_labeled('oof_probs_v16_labeled')
assert vids, 'No labeled OOF videos found'
seq_gt_y = {}
for vid in vids:
    y = Ymap[vid]; seq = []; prev = -1
    for t in y:
        ti = int(t)
        if ti != prev and ti > 0:
            seq.append(ti)
        prev = ti
    seq_gt_y[vid] = seq[:20]

# 2) Fit per-class temperatures on all OOF frames
P_oof = np.concatenate([Pmap[v] for v in vids], 0)
y_oof = np.concatenate([Ymap[v] for v in vids], 0)
Tcls16 = fit_per_class_temperature(P_oof, y_oof, np.linspace(0.7, 1.6, 12).astype(np.float32))
print('[GT(y)] Temps first10:', np.round(Tcls16[:10], 3).tolist())

# 3) OOF-Lev eval (log-prob, no permutation) against seq_gt_y
def eval_oof(alpha, bg_bias, smooth, g, cost_mode):
    dists = []
    for vid in vids:
        L = to_calibrated_logprobs(Pmap[vid], Tcls16, bg_bias, smooth, g=g)
        pred = decode_with_cost_logprob(L, alpha, cost_mode=cost_mode)
        gt = seq_gt_y.get(int(vid), [])
        if len(gt) < 1: continue
        gg = gt[:20]
        pp = pred[:len(gg)]
        d = levenshtein(pp, gg) / float(len(gg))
        dists.append(d)
    return float(np.mean(dists)) if dists else 1.0, len(dists)

s4_095, n1 = eval_oof(0.85, 0.25, 3, 0.95, 'median')
s4_090, n2 = eval_oof(0.85, 0.25, 3, 0.90, 'median')
s5_095, n3 = eval_oof(0.85, 0.20, 3, 0.95, 'trimmed_mean_20')
print(f'[OOF-Lev|yGT] S4 g=0.95 -> {s4_095:.5f} ({n1} vids)')
print(f'[OOF-Lev|yGT] S4 g=0.90 -> {s4_090:.5f} ({n2} vids)')
print(f'[OOF-Lev|yGT] S5 g=0.95 -> {s5_095:.5f} ({n3} vids)')

# 4) Write best test submission with the same decoder
def load_probs_dir_simple(d, ids):
    out = {}
    for vid in ids:
        for pat in (f'{vid}.npz', f'{vid:05d}.npz', f'test_{vid:05d}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: out[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: out[vid] = z['P'].astype(np.float32)
                break
    return out

# Select best config (name, score, cost_mode, bg_bias, smooth, g) and unpack cleanly
best_tuple = sorted([
    ('S4_a0.85_bb0.25_s3_g0.95', s4_095, 'median', 0.25, 3, 0.95),
    ('S4_a0.85_bb0.25_s3_g0.90', s4_090, 'median', 0.25, 3, 0.90),
    ('S5_a0.85_bb0.20_s3_g0.95', s5_095, 'trimmed_mean_20', 0.20, 3, 0.95),
], key=lambda x: x[1])[0]
best_name, best_score, best_cost_mode, best_bb, best_smooth, best_g = best_tuple
print('[OOF-Lev|yGT] Best:', best_name, '->', f'{best_score:.5f}')

test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()
mp = load_probs_dir_simple('test_probs_v16', test_ids)
rows = []
for vid in sorted(mp.keys()):
    L = to_calibrated_logprobs(mp[vid], Tcls16, best_bb, best_smooth, g=best_g)
    seq = decode_with_cost_logprob(L, 0.85, cost_mode=best_cost_mode)
    rows.append((vid, ' '.join(map(str, seq))))
sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
out = f'submission_v16_yGT_{best_name}.csv'
sub.to_csv(out, index=False)
pd.read_csv(out).to_csv('submission.csv', index=False)
print('[Wrote] submission.csv <-', out, sub.shape)

[GT(y)] Temps first10: [1.190999984741211, 1.0269999504089355, 1.1089999675750732, 1.0269999504089355, 1.0269999504089355, 1.0269999504089355, 1.1089999675750732, 1.1089999675750732, 1.0269999504089355, 1.1089999675750732]


[OOF-Lev|yGT] S4 g=0.95 -> 0.45420 (297 vids)
[OOF-Lev|yGT] S4 g=0.90 -> 0.45299 (297 vids)
[OOF-Lev|yGT] S5 g=0.95 -> 0.40374 (297 vids)
[OOF-Lev|yGT] Best: S5_a0.85_bb0.20_s3_g0.95 -> 0.40374


[Wrote] submission.csv <- submission_v16_yGT_S5_a0.85_bb0.20_s3_g0.95.csv (95, 2)


In [53]:
# Quick alt: v16-only log-prob decoder variants (more aggressive) and set a=0.80, bb=0.30, g=0.90, s=3, median as submission.csv
import os, glob, numpy as np, pandas as pd
from scipy.special import logit, softmax
from scipy.signal import convolve
from scipy.optimize import linear_sum_assignment

def load_oof_labeled(oof_dir='oof_probs_v16_labeled'):
    X, Y = [], []
    for fn in glob.glob(os.path.join(oof_dir, 'oof_*.npz')):
        z = np.load(fn, allow_pickle=True)
        if 'probs' in z and 'y' in z:
            X.append(z['probs'].astype(np.float32))
            Y.append(z['y'].astype(np.int32))
    if not X: return None, None
    return np.concatenate(X, 0), np.concatenate(Y, 0)

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    C = P_oof.shape[1]
    Tcls = np.ones(C, dtype=np.float32)
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc); yc = (y_oof == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logprobs(P, Tcls, bg_bias, smooth_win, g=0.90):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32) / (Tcls.reshape(1, -1) * float(g))
    Pcal = softmax(Z, axis=1).astype(np.float32)
    Pcal /= np.clip(Pcal.sum(1, keepdims=True), 1e-6, None)
    L = np.log(np.clip(Pcal, 1e-6, 1.0)).astype(np.float32)
    L[:, 0] += float(bg_bias)
    if smooth_win and smooth_win > 1:
        k = np.ones(int(smooth_win), np.float32) / float(smooth_win)
        L = np.stack([convolve(L[:, i], k, mode='same') for i in range(L.shape[1])], 1).astype(np.float32)
    return L

def segment_exact20(margin, K, min_len):
    T = int(len(margin)); min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1: min_len -= 1
    if K*min_len > T: K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32); bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv: bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []; k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse(); return bounds

def decode_with_cost_logprob(L, alpha, cost_mode='median'):
    T, C = L.shape; K = 20
    min_len = max(2, int(alpha * T / float(K)))
    cols = list(range(1, C))
    m = (np.log(np.clip(np.exp(L[:, cols]).sum(1, keepdims=True), 1e-12, None)).squeeze(1) - L[:, 0]).astype(np.float32)
    segs = segment_exact20(m, K, min_len)
    Cmat = np.zeros((len(segs), K), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = L[s:e, 1:21]
        if cost_mode == 'trimmed_mean_20':
            n = seg.shape[0]; lo = int(0.1*n); hi = max(lo+1, int(0.9*n))
            agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        elif cost_mode == 'median':
            agg = np.median(seg, axis=0)
        else:
            agg = seg.mean(0)
        Cmat[j, :] = -agg
    r, c = linear_sum_assignment(Cmat)
    order = [int(c[j])+1 for j in np.argsort(r)]
    return order

def load_probs_dir_simple(d, ids):
    out = {}
    for vid in ids:
        for pat in (f'{vid}.npz', f'{vid:05d}.npz', f'test_{vid:05d}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: out[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: out[vid] = z['P'].astype(np.float32)
                break
    return out

# Fit temps from labeled OOF
P_oof, y_oof = load_oof_labeled('oof_probs_v16_labeled')
assert P_oof is not None and y_oof is not None, 'Missing labeled OOF v16'
Tcls16 = fit_per_class_temperature(P_oof, y_oof, np.linspace(0.7, 1.6, 12).astype(np.float32))

# Load test v16 probs
test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()
mp = load_probs_dir_simple('test_probs_v16', test_ids)

def run_variant(name, alpha, bg_bias, smooth, g, cost_mode='median'):
    rows = []
    for vid in sorted(mp.keys()):
        L = to_calibrated_logprobs(mp[vid], Tcls16, bg_bias, smooth, g=g)
        seq = decode_with_cost_logprob(L, alpha, cost_mode=cost_mode)
        rows.append((vid, ' '.join(map(str, seq))))
    sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
    out = f'submission_v16_logprob_{name}.csv'
    sub.to_csv(out, index=False)
    print('[Write]', out, sub.shape)
    return out

# Aggressive variants
out1 = run_variant('S4_a0.80_bb0.30_s3_g0.90_median', alpha=0.80, bg_bias=0.30, smooth=3, g=0.90, cost_mode='median')
out2 = run_variant('S5_a0.85_bb0.20_s3_g0.90_trim', alpha=0.85, bg_bias=0.20, smooth=3, g=0.90, cost_mode='trimmed_mean_20')
out3 = run_variant('S4_a0.80_bb0.25_s3_g0.90_median', alpha=0.80, bg_bias=0.25, smooth=3, g=0.90, cost_mode='median')

# Set primary to S4 a0.80 bb0.30 s3 g=0.90 median
pd.read_csv(out1).to_csv('submission.csv', index=False)
print('[Primary] submission.csv <-', out1)

[Write] submission_v16_logprob_S4_a0.80_bb0.30_s3_g0.90_median.csv (95, 2)


[Write] submission_v16_logprob_S5_a0.85_bb0.20_s3_g0.90_trim.csv (95, 2)


[Write] submission_v16_logprob_S4_a0.80_bb0.25_s3_g0.90_median.csv (95, 2)
[Primary] submission.csv <- submission_v16_logprob_S4_a0.80_bb0.30_s3_g0.90_median.csv


In [54]:
# Cell 39: Install Torch (cu121) + TCN 5-fold GroupKFold training on cache_v16 -> OOF/test probs
import os, sys, subprocess, time, random, glob, math, gc, json
import numpy as np, pandas as pd

# 1) Safe Torch install (CUDA 12.1) + sanity
try:
    import torch, torchvision, torchaudio  # noqa
    need_install = False
except Exception:
    need_install = True

if need_install:
    subprocess.run([sys.executable, "-m", "pip", "install", "--no-cache-dir",
                    "--index-url", "https://download.pytorch.org/whl/cu121",
                    "torch==2.4.1", "torchvision==0.19.1", "torchaudio==2.4.1"], check=False)

import torch, torch.nn as nn, torch.nn.functional as F
print("[Torch]", torch.__version__)
os.system('nvidia-smi || true')
print("[CUDA avail]", torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

from sklearn.model_selection import GroupKFold

# 2) Repro + config
SEED = 42
random.seed(SEED); np.random.seed(SEED)
torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

CACHE_DIR = './cache_v16'
OOF_DIR = './oof_probs_v16_tcn'
TEST_FOLD_DIRS = [f'./test_probs_v16_tcn_fold{i}' for i in range(1, 6)]
for d in [OOF_DIR] + TEST_FOLD_DIRS:
    os.makedirs(d, exist_ok=True)

train_ids = pd.read_csv('training.csv')['Id'].astype(int).tolist()
test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()

def load_train_video(vid):
    fn = os.path.join(CACHE_DIR, f'train_{vid:05d}.npz')
    if not os.path.exists(fn): return None, None
    z = np.load(fn, allow_pickle=False)
    X = z['X'].astype(np.float32)
    y = (z['y'] if 'y' in z else z['labels']).astype(np.int64)
    return X, y

def load_test_video(vid):
    fn = os.path.join(CACHE_DIR, f'test_{vid:05d}.npz')
    if not os.path.exists(fn): return None
    z = np.load(fn, allow_pickle=False)
    return z['X'].astype(np.float32)

# 3) Small, strong 1D TCN (dilated convs; left-causal bias via crop)
class TCN1D(nn.Module):
    def __init__(self, in_ch=193, hid=128, num_classes=21, k=7, drop=0.30):
        super().__init__()
        self.stem = nn.Conv1d(in_ch, hid, 1)
        blocks = []
        self.k = k
        self.dils = [1, 2, 4, 8]
        for d in self.dils:
            pad = d * (k - 1)  # pad left+right; crop right later
            blocks += [nn.Conv1d(hid, hid, k, padding=pad, dilation=d),
                       nn.BatchNorm1d(hid),
                       nn.ReLU(inplace=True),
                       nn.Dropout(drop)]
        self.tcn = nn.Sequential(*blocks)
        self.head = nn.Conv1d(hid, num_classes, 1)

    def forward(self, x):               # x: [B,T,F]
        x = x.transpose(1, 2)           # -> [B,F,T]
        h = self.stem(x)
        h = self.tcn(h)
        # total pad per block on right side = d*(k-1); sum d=[1,2,4,8] -> 15*(k-1) with k=7 => 90
        crop = sum(self.dils) * (self.k - 1)
        if crop > 0 and h.shape[-1] > crop:
            h = h[:, :, :-crop]
        logits = self.head(h)           # [B,C,T']
        return logits.transpose(1, 2)   # [B,T',C]

# 4) Train config
CFG = dict(
    epochs=10, patience=2,
    steps_per_epoch=160,
    batch_size=20, win_len=256,
    lr=1e-3, wd=1e-2, eta_min=1e-5,
    label_smoothing=0.05, bg_weight=0.5,
    num_classes=21, grad_clip=1.0
)

def compute_norm_stats(X_list):
    s = np.zeros(X_list[0].shape[1], np.float64)
    ss = np.zeros_like(s); n = 0
    for X in X_list:
        s += X.sum(0); ss += (X.astype(np.float64)**2).sum(0); n += X.shape[0]
    mu = s / max(1, n)
    var = np.maximum(0.0, ss / max(1, n) - mu**2)
    std = np.sqrt(var + 1e-6).astype(np.float32)
    return mu.astype(np.float32), std

def make_class_weights(Y_list, C=21, bg_weight=0.5):
    cnt = np.zeros(C, np.int64)
    for y in Y_list:
        cnt += np.bincount(y, minlength=C)
    cnt = np.maximum(cnt, 1)
    w = (cnt.sum() / cnt.astype(np.float32))**0.5
    w = (w / w.mean()).astype(np.float32)
    w[0] *= float(bg_weight)
    return torch.tensor(w, dtype=torch.float32, device=device)

def sample_batch(X_list, y_list, mu, std, B, T):
    F = X_list[0].shape[1]
    Xb = np.zeros((B, T, F), np.float32)
    yb = np.zeros((B, T), np.int64)
    for i in range(B):
        j = random.randrange(len(X_list))
        Xv, yv = X_list[j], y_list[j]
        if Xv.shape[0] <= T:
            s = 0
        else:
            s = random.randrange(0, Xv.shape[0] - T + 1)
        e = min(s + T, Xv.shape[0])
        xs, ys = Xv[s:e], yv[s:e]
        if xs.shape[0] < T:
            pad = T - xs.shape[0]
            Xb[i] = np.vstack([xs, np.tile(xs[-1:], (pad, 1))])
            yb[i] = np.concatenate([ys, np.full((pad,), ys[-1], np.int64)])
        else:
            Xb[i], yb[i] = xs, ys
    Xb = (Xb - mu.reshape(1, 1, -1)) / std.reshape(1, 1, -1)
    return torch.from_numpy(Xb), torch.from_numpy(yb)

@torch.no_grad()
def infer_full_video(model, Xv, mu, std, chunk=4096):
    model.eval()
    Xn = (Xv - mu) / std
    T = Xn.shape[0]; C = CFG['num_classes']
    probs = np.zeros((T, C), np.float32)
    for s in range(0, T, chunk):
        e = min(T, s + chunk)
        xb = torch.from_numpy(Xn[s:e]).unsqueeze(0).to(device)
        with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16):
            logits = model(xb).squeeze(0)
        p = F.softmax(logits.float(), dim=-1).cpu().numpy().astype(np.float32)
        probs[s:e] = p
    probs = probs / np.clip(probs.sum(1, keepdims=True), 1e-8, None)
    return probs

# 5) Load all train videos into memory
t0 = time.time()
vids_all, Xs_all, Ys_all = [], [], []
for vid in train_ids:
    X, y = load_train_video(vid)
    if X is None or y is None or X.shape[0] != y.shape[0]:
        continue
    vids_all.append(int(vid)); Xs_all.append(X); Ys_all.append(y)
assert Xs_all and Ys_all, "No training videos loaded from cache_v16"
n_feat = Xs_all[0].shape[1]
print(f"[Load] videos={len(vids_all)} n_feat={n_feat} elapsed={time.time()-t0:.1f}s")

# 6) 5-fold GroupKFold by video id
gkf = GroupKFold(n_splits=5)
vids_arr = np.array(vids_all, np.int32)

fold_times = []
for fold, (tr_idx, va_idx) in enumerate(gkf.split(vids_arr, groups=vids_arr), 1):
    tf0 = time.time()
    X_tr = [Xs_all[i] for i in tr_idx]; y_tr = [Ys_all[i] for i in tr_idx]
    X_va = [Xs_all[i] for i in va_idx]; y_va = [Ys_all[i] for i in va_idx]
    vids_tr = [vids_all[i] for i in tr_idx]; vids_va = [vids_all[i] for i in va_idx]
    print(f"\n[Fold {fold}/5] tr={len(vids_tr)} va={len(vids_va)}")

    mu, std = compute_norm_stats(X_tr)
    class_w = make_class_weights(y_tr, C=CFG['num_classes'], bg_weight=CFG['bg_weight'])

    model = TCN1D(in_ch=n_feat, hid=128, num_classes=CFG['num_classes'], k=7, drop=0.30).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=CFG['lr'], weight_decay=CFG['wd'])
    sch = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=CFG['epochs'], eta_min=CFG['eta_min'])
    scaler = torch.cuda.amp.GradScaler(enabled=True)

    def val_loss():
        model.eval(); tot = 0.0
        with torch.no_grad():
            for Xv, yv in zip(X_va, y_va):
                xb = torch.from_numpy(((Xv - mu) / std)).unsqueeze(0).to(device)
                yb = torch.from_numpy(yv).unsqueeze(0).to(device)
                with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16):
                    logits = model(xb)
                    loss = F.cross_entropy(
                        logits.reshape(-1, CFG['num_classes']),
                        yb.reshape(-1),
                        weight=class_w,
                        label_smoothing=CFG['label_smoothing']
                    )
                tot += float(loss.item())
        return tot / max(1, len(X_va))

    best_v, wait = 1e18, 0
    for ep in range(1, CFG['epochs'] + 1):
        t_ep = time.time()
        model.train(); run_loss = 0.0
        for _ in range(CFG['steps_per_epoch']):
            xb, yb = sample_batch(X_tr, y_tr, mu, std, CFG['batch_size'], CFG['win_len'])
            xb = xb.to(device); yb = yb.to(device)
            opt.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16):
                logits = model(xb)
                loss = F.cross_entropy(
                    logits.reshape(-1, CFG['num_classes']),
                    yb.reshape(-1),
                    weight=class_w,
                    label_smoothing=CFG['label_smoothing']
                )
            scaler.scale(loss).backward()
            if CFG['grad_clip'] and CFG['grad_clip'] > 0:
                scaler.unscale_(opt)
                nn.utils.clip_grad_norm_(model.parameters(), CFG['grad_clip'])
            scaler.step(opt); scaler.update()
            run_loss += float(loss.item())
        sch.step()
        vloss = val_loss()
        print(f"[Fold {fold}] Ep {ep:02d} tr={run_loss/CFG['steps_per_epoch']:.4f} val={vloss:.4f} lr={sch.get_last_lr()[0]:.6f} time={time.time()-t_ep:.1f}s", flush=True)
        if vloss < best_v - 1e-4:
            best_v, wait = vloss, 0
            torch.save({'model': model.state_dict(), 'mu': mu, 'std': std}, f'tcn_v16_fold{fold}.pt')
        else:
            wait += 1
            if wait >= CFG['patience']:
                print(f"[Fold {fold}] Early stop at ep {ep} (best val={best_v:.4f})")
                break

    # Load best and write OOF/test
    ck = torch.load(f'tcn_v16_fold{fold}.pt', map_location=device)
    model.load_state_dict(ck['model']); mu, std = ck['mu'], ck['std']
    model.eval()

    # OOF
    saved = 0; t_oof = time.time()
    for vid, Xv, yv in zip(vids_va, X_va, y_va):
        P = infer_full_video(model, Xv, mu, std)
        np.savez_compressed(os.path.join(OOF_DIR, f'oof_{vid:05d}.npz'),
                            probs=P.astype(np.float32),
                            y=yv.astype(np.int32),
                            sid=np.array([int(vid)], dtype=np.int32))
        saved += 1
    print(f"[Fold {fold}] OOF wrote {saved} files -> {OOF_DIR} in {time.time()-t_oof:.1f}s")

    # Test (this fold)
    tdir = TEST_FOLD_DIRS[fold - 1]
    tw = 0; t_test = time.time()
    for sid in test_ids:
        Xt = load_test_video(sid)
        if Xt is None: continue
        P = infer_full_video(model, Xt, mu, std)
        np.savez_compressed(os.path.join(tdir, f'test_{sid:05d}.npz'), probs=P.astype(np.float32))
        tw += 1
    print(f"[Fold {fold}] Test wrote {tw} files -> {tdir} in {time.time()-t_test:.1f}s")

    fold_times.append(time.time() - tf0)
    del model; gc.collect(); torch.cuda.empty_cache()

print(f"[TCN] All folds done. Per-fold times (s): {[int(x) for x in fold_times]}")
print("[Next] Run Cell 40 to average test fold probs")
with open('tcn_metadata.txt', 'w') as f:
    f.write("avg_out=./test_probs_v16_tcn/\n")
    f.write("oof_dir=./oof_probs_v16_tcn/\n")

Looking in indexes: https://download.pytorch.org/whl/cu121


Collecting torch==2.4.1
  Downloading https://download.pytorch.org/whl/cu121/torch-2.4.1%2Bcu121-cp311-cp311-linux_x86_64.whl (799.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 799.0/799.0 MB 510.2 MB/s eta 0:00:00


Collecting torchvision==0.19.1
  Downloading https://download.pytorch.org/whl/cu121/torchvision-0.19.1%2Bcu121-cp311-cp311-linux_x86_64.whl (7.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.1/7.1 MB 387.6 MB/s eta 0:00:00


Collecting torchaudio==2.4.1
  Downloading https://download.pytorch.org/whl/cu121/torchaudio-2.4.1%2Bcu121-cp311-cp311-linux_x86_64.whl (3.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.4/3.4 MB 512.4 MB/s eta 0:00:00


Collecting nvidia-cuda-nvrtc-cu12==12.1.105
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.7/23.7 MB 341.9 MB/s eta 0:00:00
Collecting filelock
  Downloading https://download.pytorch.org/whl/filelock-3.13.1-py3-none-any.whl (11 kB)


Collecting networkx
  Downloading https://download.pytorch.org/whl/networkx-3.3-py3-none-any.whl (1.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 535.1 MB/s eta 0:00:00


Collecting nvidia-cusparse-cu12==12.1.0.106
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 196.0/196.0 MB 483.3 MB/s eta 0:00:00


Collecting fsspec
  Downloading https://download.pytorch.org/whl/fsspec-2024.6.1-py3-none-any.whl (177 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 177.6/177.6 KB 447.2 MB/s eta 0:00:00
Collecting nvidia-nvtx-cu12==12.1.105
  Downloading https://download.pytorch.org/whl/cu121/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 99.1/99.1 KB 396.1 MB/s eta 0:00:00


Collecting nvidia-cusolver-cu12==11.4.5.107
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 124.2/124.2 MB 559.1 MB/s eta 0:00:00
Collecting nvidia-nccl-cu12==2.20.5
  Downloading https://download.pytorch.org/whl/cu121/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 176.2/176.2 MB 468.8 MB/s eta 0:00:00


Collecting nvidia-cublas-cu12==12.1.3.1
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 410.6/410.6 MB 446.6 MB/s eta 0:00:00


Collecting nvidia-cufft-cu12==11.0.2.54
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 121.6/121.6 MB 544.0 MB/s eta 0:00:00


Collecting sympy
  Downloading https://download.pytorch.org/whl/sympy-1.13.3-py3-none-any.whl (6.2 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.2/6.2 MB 542.1 MB/s eta 0:00:00
Collecting nvidia-cudnn-cu12==9.1.0.70
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 493.7 MB/s eta 0:00:00


Collecting nvidia-cuda-cupti-cu12==12.1.105
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.1/14.1 MB 410.6 MB/s eta 0:00:00


Collecting jinja2
  Downloading https://download.pytorch.org/whl/Jinja2-3.1.4-py3-none-any.whl (133 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 133.3/133.3 KB 421.7 MB/s eta 0:00:00
Collecting typing-extensions>=4.8.0
  Downloading https://download.pytorch.org/whl/typing_extensions-4.12.2-py3-none-any.whl (37 kB)


Collecting nvidia-cuda-runtime-cu12==12.1.105
  Downloading https://download.pytorch.org/whl/cu121/nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 823.6/823.6 KB 514.2 MB/s eta 0:00:00
Collecting triton==3.0.0
  Downloading https://download.pytorch.org/whl/triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)


     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 209.4/209.4 MB 414.5 MB/s eta 0:00:00


Collecting nvidia-curand-cu12==10.3.2.106
  Downloading https://download.pytorch.org/whl/cu121/nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.5/56.5 MB 207.6 MB/s eta 0:00:00


Collecting pillow!=8.3.*,>=5.3.0
  Downloading https://download.pytorch.org/whl/pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (4.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.4/4.4 MB 415.9 MB/s eta 0:00:00


Collecting numpy
  Downloading https://download.pytorch.org/whl/numpy-1.26.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 164.8 MB/s eta 0:00:00


Collecting nvidia-nvjitlink-cu12
  Downloading https://download.pytorch.org/whl/nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (39.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 39.7/39.7 MB 434.3 MB/s eta 0:00:00


Collecting MarkupSafe>=2.0
  Downloading https://download.pytorch.org/whl/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (28 kB)
Collecting mpmath<1.4,>=1.1.0
  Downloading https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 KB 520.4 MB/s eta 0:00:00


Installing collected packages: mpmath, typing-extensions, sympy, pillow, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, networkx, MarkupSafe, fsspec, filelock, triton, nvidia-cusparse-cu12, nvidia-cudnn-cu12, jinja2, nvidia-cusolver-cu12, torch, torchvision, torchaudio


Successfully installed MarkupSafe-2.1.5 filelock-3.13.1 fsspec-2024.6.1 jinja2-3.1.4 mpmath-1.3.0 networkx-3.3 numpy-1.26.3 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.9.86 nvidia-nvtx-cu12-12.1.105 pillow-11.0.0 sympy-1.13.3 torch-2.4.1+cu121 torchaudio-2.4.1+cu121 torchvision-0.19.1+cu121 triton-3.0.0 typing-extensions-4.12.2


[Torch] 2.4.1+cu121
Tue Sep 30 01:29:33 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.144.06             Driver Version: 550.144.06     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A10-24Q                 On  |   00000002:00:00.0 Off |                    0 |
| N/A   N/A    P0             N/A /  N/A  |    1475MiB /  24512MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                            

[Load] videos=297 n_feat=193 elapsed=0.7s

[Fold 1/5] tr=237 va=60


  scaler = torch.cuda.amp.GradScaler(enabled=True)
  with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16):


  with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16):


[Fold 1] Ep 01 tr=2.4385 val=1.8029 lr=0.000976 time=2.2s


[Fold 1] Ep 02 tr=1.6583 val=1.5584 lr=0.000905 time=0.8s


[Fold 1] Ep 03 tr=1.4260 val=1.4692 lr=0.000796 time=0.8s


[Fold 1] Ep 04 tr=1.3123 val=1.4281 lr=0.000658 time=0.8s


[Fold 1] Ep 05 tr=1.2382 val=1.4091 lr=0.000505 time=0.8s


[Fold 1] Ep 06 tr=1.1695 val=1.3906 lr=0.000352 time=0.8s


[Fold 1] Ep 07 tr=1.1339 val=1.3912 lr=0.000214 time=0.8s


[Fold 1] Ep 08 tr=1.1024 val=1.3787 lr=0.000105 time=0.8s


[Fold 1] Ep 09 tr=1.0760 val=1.3754 lr=0.000034 time=0.8s


[Fold 1] Ep 10 tr=1.0638 val=1.3731 lr=0.000010 time=0.8s


[Fold 1] OOF wrote 60 files -> ./oof_probs_v16_tcn in 0.2s


  ck = torch.load(f'tcn_v16_fold{fold}.pt', map_location=device)
  with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16):


[Fold 1] Test wrote 95 files -> ./test_probs_v16_tcn_fold1 in 1.6s

[Fold 2/5] tr=237 va=60


[Fold 2] Ep 01 tr=2.4150 val=1.7535 lr=0.000976 time=1.3s


[Fold 2] Ep 02 tr=1.6621 val=1.4716 lr=0.000905 time=0.8s


[Fold 2] Ep 03 tr=1.4418 val=1.3747 lr=0.000796 time=0.8s


[Fold 2] Ep 04 tr=1.3100 val=1.3459 lr=0.000658 time=0.8s


[Fold 2] Ep 05 tr=1.2261 val=1.3347 lr=0.000505 time=0.8s


[Fold 2] Ep 06 tr=1.1657 val=1.3188 lr=0.000352 time=0.8s


[Fold 2] Ep 07 tr=1.1228 val=1.3200 lr=0.000214 time=0.8s


[Fold 2] Ep 08 tr=1.0890 val=1.3037 lr=0.000105 time=0.8s


[Fold 2] Ep 09 tr=1.0650 val=1.2949 lr=0.000034 time=0.8s


[Fold 2] Ep 10 tr=1.0560 val=1.2961 lr=0.000010 time=0.8s


[Fold 2] OOF wrote 60 files -> ./oof_probs_v16_tcn in 0.2s


[Fold 2] Test wrote 95 files -> ./test_probs_v16_tcn_fold2 in 0.5s

[Fold 3/5] tr=238 va=59


[Fold 3] Ep 01 tr=2.3797 val=1.7797 lr=0.000976 time=1.1s


[Fold 3] Ep 02 tr=1.6293 val=1.5244 lr=0.000905 time=0.8s


[Fold 3] Ep 03 tr=1.4366 val=1.4630 lr=0.000796 time=0.8s


[Fold 3] Ep 04 tr=1.3119 val=1.4142 lr=0.000658 time=0.8s


[Fold 3] Ep 05 tr=1.2163 val=1.4101 lr=0.000505 time=0.8s


[Fold 3] Ep 06 tr=1.1644 val=1.4011 lr=0.000352 time=0.8s


[Fold 3] Ep 07 tr=1.1180 val=1.4094 lr=0.000214 time=0.8s


[Fold 3] Ep 08 tr=1.0947 val=1.4013 lr=0.000105 time=0.8s


[Fold 3] Early stop at ep 8 (best val=1.4011)
[Fold 3] OOF wrote 59 files -> ./oof_probs_v16_tcn in 0.2s


[Fold 3] Test wrote 95 files -> ./test_probs_v16_tcn_fold3 in 0.5s

[Fold 4/5] tr=238 va=59


[Fold 4] Ep 01 tr=2.3880 val=1.7272 lr=0.000976 time=1.0s


[Fold 4] Ep 02 tr=1.6542 val=1.5313 lr=0.000905 time=0.8s


[Fold 4] Ep 03 tr=1.4504 val=1.3977 lr=0.000796 time=0.8s


[Fold 4] Ep 04 tr=1.3395 val=1.3349 lr=0.000658 time=0.8s


[Fold 4] Ep 05 tr=1.2437 val=1.3052 lr=0.000505 time=0.8s


[Fold 4] Ep 06 tr=1.1764 val=1.3043 lr=0.000352 time=0.8s


[Fold 4] Ep 07 tr=1.1344 val=1.2866 lr=0.000214 time=0.8s


[Fold 4] Ep 08 tr=1.1073 val=1.2761 lr=0.000105 time=0.8s


[Fold 4] Ep 09 tr=1.0911 val=1.2700 lr=0.000034 time=0.8s


[Fold 4] Ep 10 tr=1.0664 val=1.2673 lr=0.000010 time=0.8s


[Fold 4] OOF wrote 59 files -> ./oof_probs_v16_tcn in 0.2s


[Fold 4] Test wrote 95 files -> ./test_probs_v16_tcn_fold4 in 0.5s

[Fold 5/5] tr=238 va=59


[Fold 5] Ep 01 tr=2.4663 val=1.8318 lr=0.000976 time=1.0s


[Fold 5] Ep 02 tr=1.6643 val=1.5545 lr=0.000905 time=0.8s


[Fold 5] Ep 03 tr=1.4426 val=1.4443 lr=0.000796 time=0.8s


[Fold 5] Ep 04 tr=1.3112 val=1.4103 lr=0.000658 time=0.8s


[Fold 5] Ep 05 tr=1.2265 val=1.3905 lr=0.000505 time=0.8s


[Fold 5] Ep 06 tr=1.1708 val=1.3955 lr=0.000352 time=0.8s


[Fold 5] Ep 07 tr=1.1277 val=1.3636 lr=0.000214 time=0.8s


[Fold 5] Ep 08 tr=1.0840 val=1.3644 lr=0.000105 time=0.8s


[Fold 5] Ep 09 tr=1.0796 val=1.3597 lr=0.000034 time=0.8s


[Fold 5] Ep 10 tr=1.0703 val=1.3541 lr=0.000010 time=0.8s


[Fold 5] OOF wrote 59 files -> ./oof_probs_v16_tcn in 0.1s


[Fold 5] Test wrote 95 files -> ./test_probs_v16_tcn_fold5 in 0.5s
[TCN] All folds done. Per-fold times (s): [12, 9, 7, 8, 8]
[Next] Run Cell 40 to average test fold probs


In [55]:
# Cell 40: Average 5-fold test probabilities -> ./test_probs_v16_tcn/
import os, glob, numpy as np, pandas as pd

FOLD_DIRS = [f'./test_probs_v16_tcn_fold{i}' for i in range(1, 6)]
OUT_DIR = './test_probs_v16_tcn'
os.makedirs(OUT_DIR, exist_ok=True)

test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()
wrote = 0
for sid in test_ids:
    Ps = []
    for d in FOLD_DIRS:
        fn = os.path.join(d, f'test_{sid:05d}.npz')
        if os.path.exists(fn):
            z = np.load(fn, allow_pickle=True)
            if 'probs' in z: Ps.append(z['probs'].astype(np.float32))
    if not Ps: continue
    Lmin = min(p.shape[0] for p in Ps)
    Ps = [p[:Lmin] for p in Ps]
    P = np.mean(Ps, axis=0).astype(np.float32)
    P = P / np.clip(P.sum(1, keepdims=True), 1e-8, None)
    out_fn = os.path.join(OUT_DIR, f'test_{sid:05d}.npz')
    np.savez_compressed(out_fn, probs=P)
    wrote += 1

print(f"[Average] Wrote {wrote} test videos to {OUT_DIR}")

[Average] Wrote 95 test videos to ./test_probs_v16_tcn


In [57]:
# Cell 41 (fast path): Decode averaged TCN test probs with fixed config -> submission.csv
import os, glob, numpy as np, pandas as pd, json, time
from scipy.special import logit, softmax
from scipy.signal import convolve

OOF_DIR = './oof_probs_v16_tcn'
TEST_DIR = './test_probs_v16_tcn'
assert os.path.isdir(OOF_DIR), f'Missing {OOF_DIR}. Train Cell 39 first.'
assert os.path.isdir(TEST_DIR), f'Missing {TEST_DIR}. Run Cell 40 to average folds.'

# Background index
bg_idx = 0
if os.path.exists('bg_index.json'):
    try:
        with open('bg_index.json','r') as f:
            bg_idx = int(json.load(f).get('bg_idx', 0))
    except Exception:
        pass
print(f'[Decoder] Using bg_idx={bg_idx}')

def load_oof_labeled_map(oof_dir=OOF_DIR):
    vids, Pmap, Ymap = [], {}, {}
    files = sorted(glob.glob(os.path.join(oof_dir, 'oof_*.npz')))
    for fn in files:
        z = np.load(fn, allow_pickle=True)
        sid = int(z['sid'][0]) if 'sid' in z else int(os.path.basename(fn).split('_')[1].split('.')[0])
        if 'probs' in z:
            Pmap[sid] = z['probs'].astype(np.float32)
        elif 'P' in z:
            Pmap[sid] = z['P'].astype(np.float32)
        else:
            continue
        if 'y' in z:
            Ymap[sid] = z['y'].astype(np.int32)
        elif 'labels' in z:
            Ymap[sid] = z['labels'].astype(np.int32)
        else:
            continue
        vids.append(sid)
    return sorted(vids), Pmap, Ymap

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    C = P_oof.shape[1]; Tcls = np.ones(C, dtype=np.float32)
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc)
        yc = (y_oof == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best:
                best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logprobs(P, Tcls, bg_bias=0.25, smooth=3, g=0.95):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32) / (Tcls.reshape(1, -1) * float(g))
    Pcal = softmax(Z, axis=1).astype(np.float32)
    Pcal /= np.clip(Pcal.sum(1, keepdims=True), 1e-6, None)
    L = np.log(np.clip(Pcal, 1e-6, 1.0)).astype(np.float32)
    L[:, bg_idx] += float(bg_bias)
    if smooth and smooth > 1:
        k = np.ones(int(smooth), np.float32) / float(smooth)
        L = np.stack([convolve(L[:, i], k, mode='same') for i in range(L.shape[1])], 1).astype(np.float32)
    return L

def segment_exactK(margin, K=20, min_len=1):
    T = int(len(margin)); min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1: min_len -= 1
    if K*min_len > T: K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32); bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv: bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []; k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse(); return bounds

def decode_with_cost_logprob(L, alpha=0.85, cost_mode='trimmed_mean_20'):
    T, C = L.shape; K = 20
    min_len = max(2, int(alpha * T / float(K)))
    cols = [i for i in range(C) if i != bg_idx]
    margin = (L[:, cols].max(1) - L[:, bg_idx]).astype(np.float32)
    segs = segment_exactK(margin, K=K, min_len=min_len)
    Cmat = np.zeros((len(segs), K), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = L[s:e, cols]
        if cost_mode == 'trimmed_mean_20':
            n = seg.shape[0]; lo = int(0.1*n); hi = max(lo+1, int(0.9*n)); agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        elif cost_mode == 'median':
            agg = np.median(seg, axis=0)
        else:
            agg = seg.mean(0)
        Cmat[j, :] = -agg
    # Hungarian
    from scipy.optimize import linear_sum_assignment
    r, c = linear_sum_assignment(Cmat)
    order_nonbg_idx = [int(c[j]) for j in np.argsort(r)]
    order_cls = [idx+1 for idx in order_nonbg_idx]
    return order_cls

# 1) Load OOF labeled frames for temperature calibration
vids, Pmap, Ymap = load_oof_labeled_map(OOF_DIR)
assert vids, f'No OOF files in {OOF_DIR}'
P_all = np.concatenate([Pmap[v] for v in vids], 0)
y_all = np.concatenate([Ymap[v] for v in vids], 0)
temp_grid = np.linspace(0.7, 1.6, 12).astype(np.float32)
Tcls = fit_per_class_temperature(P_all, y_all, temp_grid)
print('[Calib] Tcls first10:', np.round(Tcls[:10], 3).tolist())

# 2) Fixed best config (per expert): S5 trimmed_mean_20, alpha=0.85, bg_bias=0.20, smooth=3, g=0.95
cfg = dict(mode='trimmed_mean_20', alpha=0.85, bb=0.20, smooth=3, g=0.95)

# 3) Decode averaged test probs with fixed config
test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()
rows = []; wrote = 0
t0 = time.time()
for sid in sorted(test_ids):
    fn = os.path.join(TEST_DIR, f'test_{sid:05d}.npz')
    if not os.path.exists(fn):
        continue
    z = np.load(fn, allow_pickle=True)
    if 'probs' not in z: continue
    P = z['probs'].astype(np.float32)
    L = to_calibrated_logprobs(P, Tcls, bg_bias=cfg['bb'], smooth=cfg['smooth'], g=cfg['g'])
    seq = decode_with_cost_logprob(L, alpha=cfg['alpha'], cost_mode=cfg['mode'])
    # Rescue to permutation 1..20
    seen, dup_idx = set(), []
    for i, x in enumerate(seq):
        if x in seen: dup_idx.append(i)
        else: seen.add(x)
    missing = [k for k in range(1, 21) if k not in seen]
    for j, i in enumerate(dup_idx):
        if j < len(missing): seq[i] = missing[j]
    if len(seq) < 20: seq += [k for k in range(1, 21) if k not in set(seq)]
    seq = seq[:20]
    rows.append((sid, ' '.join(map(str, seq))))
    wrote += 1
print(f'[Decode] wrote sequences for {wrote}/{len(test_ids)} test videos in {time.time()-t0:.1f}s')

sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
sub.to_csv('submission.csv', index=False)
print('[Write] submission.csv', sub.shape, 'cfg=', cfg)

[Decoder] Using bg_idx=0


[Calib] Tcls first10: [1.600000023841858, 0.9449999928474426, 0.9449999928474426, 0.9449999928474426, 0.9449999928474426, 0.9449999928474426, 0.9449999928474426, 0.9449999928474426, 0.9449999928474426, 0.9449999928474426]


[Decode] wrote sequences for 95/95 test videos in 35.5s
[Write] submission.csv (95, 2) cfg= {'mode': 'trimmed_mean_20', 'alpha': 0.85, 'bb': 0.2, 'smooth': 3, 'g': 0.95}


In [58]:
# Cell 42: Blend TCN + XGB per-frame probs (v16), OOF-gated small grid, decode test -> submission.csv
import os, glob, time, json, numpy as np, pandas as pd
from scipy.special import logit, softmax
from scipy.signal import convolve
from scipy.optimize import linear_sum_assignment

OOF_TCN_DIR = './oof_probs_v16_tcn'
OOF_XGB_DIR = './oof_probs_v16_labeled'  # labeled OOF for v16 XGB
TEST_TCN_DIR = './test_probs_v16_tcn'
TEST_XGB_DIR = './test_probs_v16'

# Background index
bg_idx = 0
if os.path.exists('bg_index.json'):
    try:
        with open('bg_index.json','r') as f:
            bg_idx = int(json.load(f).get('bg_idx', 0))
    except Exception:
        pass
print(f'[Blend] Using bg_idx={bg_idx}')

def load_oof_map(oof_dir):
    vids, Pmap, Ymap = [], {}, {}
    files = sorted(glob.glob(os.path.join(oof_dir, 'oof_*.npz')))
    for fn in files:
        z = np.load(fn, allow_pickle=True)
        sid = int(z['sid'][0]) if 'sid' in z else int(os.path.basename(fn).split('_')[1].split('.')[0])
        P = z['probs'] if 'probs' in z else (z['P'] if 'P' in z else None)
        y = z['y'] if 'y' in z else (z['labels'] if 'labels' in z else None)
        if P is None or y is None: continue
        Pmap[int(sid)] = P.astype(np.float32); Ymap[int(sid)] = y.astype(np.int32); vids.append(int(sid))
    return sorted(vids), Pmap, Ymap

def collapse_y_to_sequence(y):
    seq = []; prev = -1
    for t in y:
        ti = int(t)
        if ti != prev and ti > 0: seq.append(ti)
        prev = ti
    return seq[:20]

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    C = P_oof.shape[1]; Tcls = np.ones(C, dtype=np.float32)
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc)
        yc = (y_oof == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logprobs(P, Tcls, bg_bias=0.25, smooth=3, g=0.95):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32) / (Tcls.reshape(1, -1) * float(g))
    Pcal = softmax(Z, axis=1).astype(np.float32)
    Pcal /= np.clip(Pcal.sum(1, keepdims=True), 1e-6, None)
    L = np.log(np.clip(Pcal, 1e-6, 1.0)).astype(np.float32)
    L[:, bg_idx] += float(bg_bias)
    if smooth and smooth > 1:
        k = np.ones(int(smooth), np.float32) / float(smooth)
        L = np.stack([convolve(L[:, i], k, mode='same') for i in range(L.shape[1])], 1).astype(np.float32)
    return L

def segment_exactK(margin, K=20, min_len=1):
    T = int(len(margin)); min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1: min_len -= 1
    if K*min_len > T: K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32); bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv: bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []; k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse(); return bounds

def decode_with_cost_logprob(L, alpha=0.85, cost_mode='trimmed_mean_20'):
    T, C = L.shape; K = 20
    min_len = max(2, int(alpha * T / float(K)))
    cols = [i for i in range(C) if i != bg_idx]
    margin = (L[:, cols].max(1) - L[:, bg_idx]).astype(np.float32)
    segs = segment_exactK(margin, K=K, min_len=min_len)
    Cmat = np.zeros((len(segs), K), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = L[s:e, cols]
        if cost_mode == 'trimmed_mean_20':
            n = seg.shape[0]; lo = int(0.1*n); hi = max(lo+1, int(0.9*n)); agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        elif cost_mode == 'median':
            agg = np.median(seg, axis=0)
        else:
            agg = seg.mean(0)
        Cmat[j, :] = -agg
    r, c = linear_sum_assignment(Cmat)
    order_nonbg_idx = [int(c[j]) for j in np.argsort(r)]
    order_cls = [idx+1 for idx in order_nonbg_idx]
    return order_cls

def levenshtein(a, b):
    n, m = len(a), len(b); dp = list(range(m+1))
    for i in range(1, n+1):
        prev, dp[0] = dp[0], i
        for j in range(1, m+1):
            cur = min(dp[j]+1, dp[j-1]+1, prev + (0 if a[i-1]==b[j-1] else 1))
            prev, dp[j] = dp[j], cur
    return dp[m]

# 1) Load OOF for TCN and XGB(v16), intersect videos
vids_t, P_t, Y_t = load_oof_map(OOF_TCN_DIR)
vids_x, P_x, Y_x = load_oof_map(OOF_XGB_DIR)
common = sorted(list(set(vids_t).intersection(vids_x)))
assert common, 'No overlapping OOF videos between TCN and XGB'
print('[Blend] OOF intersect vids:', len(common))

# Pre-build GT sequences from y-collapsed
seq_gt = {vid: collapse_y_to_sequence(Y_t.get(vid, Y_x.get(vid))) for vid in common}

# 2) Small OOF-gated grid over blend weight and decoder params
weights = [0.6, 0.7, 0.8]  # w_tcn
modes = ['median', 'trimmed_mean_20']
gs = [0.90, 0.95]
bg_biases = [0.20, 0.25]
smooths = [3]
alpha = 0.85
temp_grid = np.linspace(0.7, 1.6, 12).astype(np.float32)

def eval_cfg(w_tcn, mode, g, bb, smooth):
    # Build blended OOF frames and fit temps on all frames
    X_all, y_all = [], []
    for vid in common:
        Pt, Px = P_t[vid], P_x[vid]
        Lmin = min(Pt.shape[0], Px.shape[0])
        Pb = (w_tcn*Pt[:Lmin] + (1.0-w_tcn)*Px[:Lmin]).astype(np.float32)
        yv = Y_t.get(vid, Y_x.get(vid))[:Lmin]
        X_all.append(Pb); y_all.append(yv)
    P_all = np.concatenate(X_all, 0); y_all = np.concatenate(y_all, 0)
    Tcls = fit_per_class_temperature(P_all, y_all, temp_grid)
    # OOF-Lev over common vids
    dists = []
    for vid in common:
        Pt, Px = P_t[vid], P_x[vid]
        Lmin = min(Pt.shape[0], Px.shape[0])
        Pb = (w_tcn*Pt[:Lmin] + (1.0-w_tcn)*Px[:Lmin]).astype(np.float32)
        L = to_calibrated_logprobs(Pb, Tcls, bg_bias=bb, smooth=smooth, g=g)
        pred = decode_with_cost_logprob(L, alpha=alpha, cost_mode=mode)
        gt = seq_gt.get(vid, [])
        if len(gt) < 1: continue
        gg = gt[:20]; pp = pred[:len(gg)]
        dists.append(levenshtein(pp, gg)/float(len(gg)))
    return (float(np.mean(dists)) if dists else 1.0), len(dists)

best = (1.0, None)
t0 = time.time()
for w in weights:
    for mode in modes:
        for g in gs:
            for bb in bg_biases:
                for s in smooths:
                    score, n = eval_cfg(w, mode, g, bb, s)
                    print(f"[OOF-Blend] w_tcn={w} mode={mode} g={g} bb={bb} s={s} -> {score:.5f} (n={n})")
                    if score < best[0]:
                        best = (score, dict(w=w, mode=mode, g=g, bb=bb, s=s))
print('[OOF-Blend] Best:', best, 'elapsed', f'{time.time()-t0:.1f}s')

# 3) Decode TEST with best config using blended per-frame probs from TEST_TCN_DIR + TEST_XGB_DIR
cfg = best[1] if best[1] is not None else dict(w=0.7, mode='trimmed_mean_20', g=0.95, bb=0.20, s=3)
test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()

def load_test_probs_map(d, ids):
    m = {}
    for vid in ids:
        for pat in (f'test_{vid:05d}.npz', f'{vid:05d}.npz', f'{vid}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: m[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: m[vid] = z['P'].astype(np.float32)
                break
    return m

M_t = load_test_probs_map(TEST_TCN_DIR, test_ids)
M_x = load_test_probs_map(TEST_XGB_DIR, test_ids)
ids_common = sorted(list(set(M_t.keys()).intersection(M_x.keys())))
print('[Blend-Test] vids common:', len(ids_common))

# Refit temps on blended OOF with chosen weight
X_all, y_all = [], []
for vid in common:
    Pt, Px = P_t[vid], P_x[vid]
    Lmin = min(Pt.shape[0], Px.shape[0])
    Pb = (cfg['w']*Pt[:Lmin] + (1.0-cfg['w'])*Px[:Lmin]).astype(np.float32)
    yv = Y_t.get(vid, Y_x.get(vid))[:Lmin]
    X_all.append(Pb); y_all.append(yv)
P_all = np.concatenate(X_all, 0); y_all = np.concatenate(y_all, 0)
Tcls_best = fit_per_class_temperature(P_all, y_all, np.linspace(0.7, 1.6, 12).astype(np.float32))

rows = []; wrote = 0
for vid in sorted(test_ids):
    Pt = M_t.get(vid); Px = M_x.get(vid)
    if Pt is None or Px is None: continue
    Lmin = min(Pt.shape[0], Px.shape[0])
    Pb = (cfg['w']*Pt[:Lmin] + (1.0-cfg['w'])*Px[:Lmin]).astype(np.float32)
    L = to_calibrated_logprobs(Pb, Tcls_best, bg_bias=cfg['bb'], smooth=cfg['s'], g=cfg['g'])
    seq = decode_with_cost_logprob(L, alpha=alpha, cost_mode=cfg['mode'])
    # rescue permutation-20
    seen, dup_idx = set(), []
    for i, x in enumerate(seq):
        if x in seen: dup_idx.append(i)
        else: seen.add(x)
    missing = [k for k in range(1, 21) if k not in seen]
    for j, i in enumerate(dup_idx):
        if j < len(missing): seq[i] = missing[j]
    if len(seq) < 20: seq += [k for k in range(1,21) if k not in set(seq)]
    seq = seq[:20]
    rows.append((vid, ' '.join(map(str, seq))))
    wrote += 1
sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
sub.to_csv('submission.csv', index=False)
print('[Write] submission.csv', sub.shape, 'best_cfg=', cfg)

[Blend] Using bg_idx=0


[Blend] OOF intersect vids: 297


[OOF-Blend] w_tcn=0.6 mode=median g=0.9 bb=0.2 s=3 -> 0.40089 (n=297)


[OOF-Blend] w_tcn=0.6 mode=median g=0.9 bb=0.25 s=3 -> 0.40089 (n=297)


[OOF-Blend] w_tcn=0.6 mode=median g=0.95 bb=0.2 s=3 -> 0.40072 (n=297)


[OOF-Blend] w_tcn=0.6 mode=median g=0.95 bb=0.25 s=3 -> 0.40072 (n=297)


[OOF-Blend] w_tcn=0.6 mode=trimmed_mean_20 g=0.9 bb=0.2 s=3 -> 0.38635 (n=297)


[OOF-Blend] w_tcn=0.6 mode=trimmed_mean_20 g=0.9 bb=0.25 s=3 -> 0.38635 (n=297)


[OOF-Blend] w_tcn=0.6 mode=trimmed_mean_20 g=0.95 bb=0.2 s=3 -> 0.38652 (n=297)


[OOF-Blend] w_tcn=0.6 mode=trimmed_mean_20 g=0.95 bb=0.25 s=3 -> 0.38652 (n=297)


[OOF-Blend] w_tcn=0.7 mode=median g=0.9 bb=0.2 s=3 -> 0.40343 (n=297)


[OOF-Blend] w_tcn=0.7 mode=median g=0.9 bb=0.25 s=3 -> 0.40343 (n=297)


[OOF-Blend] w_tcn=0.7 mode=median g=0.95 bb=0.2 s=3 -> 0.40444 (n=297)


[OOF-Blend] w_tcn=0.7 mode=median g=0.95 bb=0.25 s=3 -> 0.40444 (n=297)


[OOF-Blend] w_tcn=0.7 mode=trimmed_mean_20 g=0.9 bb=0.2 s=3 -> 0.38701 (n=297)


[OOF-Blend] w_tcn=0.7 mode=trimmed_mean_20 g=0.9 bb=0.25 s=3 -> 0.38701 (n=297)


[OOF-Blend] w_tcn=0.7 mode=trimmed_mean_20 g=0.95 bb=0.2 s=3 -> 0.38684 (n=297)


[OOF-Blend] w_tcn=0.7 mode=trimmed_mean_20 g=0.95 bb=0.25 s=3 -> 0.38684 (n=297)


KeyboardInterrupt: 

In [59]:
# Cell 43: FAST WRITE blended TCN+XGB submission using best-seen cfg (w=0.6, trimmed_mean_20, g=0.90, bb=0.20, s=3)
import os, glob, numpy as np, pandas as pd, json, time
from scipy.special import logit, softmax
from scipy.signal import convolve
from scipy.optimize import linear_sum_assignment

OOF_TCN_DIR = './oof_probs_v16_tcn'
OOF_XGB_DIR = './oof_probs_v16_labeled'
TEST_TCN_DIR = './test_probs_v16_tcn'
TEST_XGB_DIR = './test_probs_v16'

# Config from best OOF-Blend observed
w_tcn = 0.6
cfg = dict(mode='trimmed_mean_20', g=0.90, bb=0.20, smooth=3, alpha=0.85)

# Background index
bg_idx = 0
if os.path.exists('bg_index.json'):
    try:
        with open('bg_index.json','r') as f:
            bg_idx = int(json.load(f).get('bg_idx', 0))
    except Exception:
        pass
print(f'[FAST-BLEND] bg_idx={bg_idx} cfg={cfg} w_tcn={w_tcn}')

def load_oof_map(oof_dir):
    vids, Pmap, Ymap = [], {}, {}
    for fn in sorted(glob.glob(os.path.join(oof_dir, 'oof_*.npz'))):
        z = np.load(fn, allow_pickle=True)
        sid = int(z['sid'][0]) if 'sid' in z else int(os.path.basename(fn).split('_')[1].split('.')[0])
        P = z['probs'] if 'probs' in z else (z['P'] if 'P' in z else None)
        y = z['y'] if 'y' in z else (z['labels'] if 'labels' in z else None)
        if P is None or y is None: continue
        vids.append(int(sid)); Pmap[int(sid)] = P.astype(np.float32); Ymap[int(sid)] = y.astype(np.int32)
    return sorted(vids), Pmap, Ymap

def fit_per_class_temperature(P_oof, y_oof, temp_grid):
    C = P_oof.shape[1]; Tcls = np.ones(C, dtype=np.float32)
    for c in range(C):
        pc = np.clip(P_oof[:, c], 1e-6, 1-1e-6); z = logit(pc); yc = (y_oof == c).astype(np.float32)
        best, bestT = 1e18, 1.0
        for T in temp_grid:
            p = 1.0/(1.0 + np.exp(-z/float(T)))
            nll = -(yc*np.log(np.clip(p,1e-6,1))).mean() - ((1-yc)*np.log(np.clip(1-p,1e-6,1))).mean()
            if nll < best: best, bestT = float(nll), float(T)
        Tcls[c] = bestT
    return Tcls.astype(np.float32)

def to_calibrated_logprobs(P, Tcls, bg_bias=0.25, smooth=3, g=0.95):
    P = np.clip(P, 1e-6, 1-1e-6).astype(np.float32)
    Z = logit(P).astype(np.float32) / (Tcls.reshape(1, -1) * float(g))
    Pcal = softmax(Z, axis=1).astype(np.float32)
    Pcal /= np.clip(Pcal.sum(1, keepdims=True), 1e-6, None)
    L = np.log(np.clip(Pcal, 1e-6, 1.0)).astype(np.float32)
    L[:, bg_idx] += float(bg_bias)
    if smooth and smooth > 1:
        k = np.ones(int(smooth), np.float32) / float(smooth)
        L = np.stack([convolve(L[:, i], k, mode='same') for i in range(L.shape[1])], 1).astype(np.float32)
    return L

def segment_exactK(margin, K=20, min_len=1):
    T = int(len(margin)); min_len = int(max(1, min_len))
    while K*min_len > T and min_len > 1: min_len -= 1
    if K*min_len > T: K = min(K, T); min_len = 1
    pref = np.concatenate([[0.0], np.cumsum(margin, 0).astype(np.float32)])
    dp = -1e18*np.ones((K+1, T+1), np.float32); bt = -np.ones((K+1, T+1), np.int32)
    dp[0, 0] = 0.0
    for k in range(1, K+1):
        start_min = (k-1)*min_len
        for t in range(k*min_len, T+1):
            s_lo = max(start_min, t - (T - (K-k)*min_len))
            bestv, bests = -1e18, -1
            for s in range(s_lo, t-min_len+1):
                v = dp[k-1, s] + (pref[t] - pref[s])
                if v > bestv: bestv, bests = v, s
            dp[k, t] = bestv; bt[k, t] = bests
    bounds = []; k, t = K, T
    while k > 0:
        s = int(bt[k, t]); bounds.append((s, t)); t = s; k -= 1
    bounds.reverse(); return bounds

def decode_with_cost_logprob(L, alpha=0.85, cost_mode='trimmed_mean_20'):
    T, C = L.shape; K = 20
    min_len = max(2, int(alpha * T / float(K)))
    cols = [i for i in range(C) if i != bg_idx]
    margin = (L[:, cols].max(1) - L[:, bg_idx]).astype(np.float32)
    segs = segment_exactK(margin, K=K, min_len=min_len)
    Cmat = np.zeros((len(segs), K), np.float32)
    for j, (s, e) in enumerate(segs):
        seg = L[s:e, cols]
        if cost_mode == 'trimmed_mean_20':
            n = seg.shape[0]; lo = int(0.1*n); hi = max(lo+1, int(0.9*n)); agg = np.sort(seg, axis=0)[lo:hi].mean(0)
        elif cost_mode == 'median':
            agg = np.median(seg, axis=0)
        else:
            agg = seg.mean(0)
        Cmat[j, :] = -agg
    r, c = linear_sum_assignment(Cmat)
    order_nonbg_idx = [int(c[j]) for j in np.argsort(r)]
    return [idx+1 for idx in order_nonbg_idx]

def load_test_probs_map(d, ids):
    m = {}
    for vid in ids:
        for pat in (f'test_{vid:05d}.npz', f'{vid:05d}.npz', f'{vid}.npz'):
            fn = os.path.join(d, pat)
            if os.path.exists(fn):
                z = np.load(fn, allow_pickle=True)
                if 'probs' in z: m[vid] = z['probs'].astype(np.float32)
                elif 'P' in z: m[vid] = z['P'].astype(np.float32)
                break
    return m

# 1) Fit temps on blended OOF with chosen weight
vids_t, P_t, Y_t = load_oof_map(OOF_TCN_DIR)
vids_x, P_x, Y_x = load_oof_map(OOF_XGB_DIR)
common = sorted(list(set(vids_t).intersection(vids_x)))
assert common, 'No overlapping OOF videos between TCN and XGB'
X_all, y_all = [], []
for vid in common:
    Pt, Px = P_t[vid], P_x[vid]
    Lmin = min(Pt.shape[0], Px.shape[0])
    Pb = (w_tcn*Pt[:Lmin] + (1.0-w_tcn)*Px[:Lmin]).astype(np.float32)
    yv = Y_t.get(vid, Y_x.get(vid))[:Lmin]
    X_all.append(Pb); y_all.append(yv)
P_all = np.concatenate(X_all, 0); y_all = np.concatenate(y_all, 0)
Tcls = fit_per_class_temperature(P_all, y_all, np.linspace(0.7, 1.6, 12).astype(np.float32))
print('[FAST-BLEND] Fitted per-class temps (first10):', np.round(Tcls[:10], 3).tolist())

# 2) Decode blended TEST
test_ids = pd.read_csv('test.csv')['Id'].astype(int).tolist()
M_t = load_test_probs_map(TEST_TCN_DIR, test_ids)
M_x = load_test_probs_map(TEST_XGB_DIR, test_ids)
rows = []; wrote = 0; t0 = time.time()
for vid in sorted(test_ids):
    Pt = M_t.get(vid); Px = M_x.get(vid)
    if Pt is None or Px is None: continue
    Lmin = min(Pt.shape[0], Px.shape[0])
    Pb = (w_tcn*Pt[:Lmin] + (1.0-w_tcn)*Px[:Lmin]).astype(np.float32)
    L = to_calibrated_logprobs(Pb, Tcls, bg_bias=cfg['bb'], smooth=cfg['smooth'], g=cfg['g'])
    seq = decode_with_cost_logprob(L, alpha=cfg['alpha'], cost_mode=cfg['mode'])
    # Rescue permutation 1..20
    seen, dup_idx = set(), []
    for i, x in enumerate(seq):
        if x in seen: dup_idx.append(i)
        else: seen.add(x)
    missing = [k for k in range(1, 21) if k not in seen]
    for j, i in enumerate(dup_idx):
        if j < len(missing): seq[i] = missing[j]
    if len(seq) < 20: seq += [k for k in range(1, 21) if k not in set(seq)]
    seq = seq[:20]
    rows.append((vid, ' '.join(map(str, seq))))
    wrote += 1
sub = pd.DataFrame(rows, columns=['Id','Sequence']).sort_values('Id')
sub.to_csv('submission.csv', index=False)
print('[FAST-BLEND] Wrote submission.csv', sub.shape, 'wrote', wrote, 'elapsed', f'{time.time()-t0:.1f}s')
assert sub.shape[0] == 95, 'Submission missing rows; ensure both TCN and XGB test probs exist'

[FAST-BLEND] bg_idx=0 cfg={'mode': 'trimmed_mean_20', 'g': 0.9, 'bb': 0.2, 'smooth': 3, 'alpha': 0.85} w_tcn=0.6


[FAST-BLEND] Fitted per-class temps (first10): [1.1089999675750732, 0.8640000224113464, 0.8640000224113464, 0.8640000224113464, 0.8640000224113464, 0.8640000224113464, 0.8640000224113464, 0.8640000224113464, 0.8640000224113464, 0.9449999928474426]


[FAST-BLEND] Wrote submission.csv (95, 2) wrote 95 elapsed 34.7s
