# Training Notebook (Kaggle/Local)

Notebook ringkas untuk training model **BTD + Radar → T1/T2/T3** menggunakan manifest CSV.

**Anda hanya perlu mengubah path dataset** pada bagian `BASE_DIR` dan `MANIFEST_DIR`.

## 0. (Opsional) Install dependency

Jika Anda menjalankan di Kaggle dan belum menyiapkan environment sendiri, jalankan cell berikut. Jika sudah, boleh skip.

In [None]:
!pip -q install -r requirements.txt || true

## 1. Konfigurasi path

- `BASE_DIR` menunjuk ke folder `NPZ/` yang berisi `btd_*/radar/t1/t2/t3`.
- `MANIFEST_DIR` menunjuk ke folder `manifests/` yang berisi `train_leadXX.csv`, `val_leadXX.csv`, dll.
- `OUT_DIR` adalah output training (weights/log).

In [None]:
import os, sys, json
from pathlib import Path

# Jika notebook ini ada di root repo, REPO_DIR = '.'
REPO_DIR = Path('.').resolve()
sys.path.insert(0, str(REPO_DIR))  # agar `import src...` bekerja

# ====== UBAH BAGIAN INI SESUAI LOKASI DATASET ANDA ======
# Kaggle contoh:
# BASE_DIR     = '/kaggle/input/NAMA_DATASET/NPZ'
# MANIFEST_DIR = '/kaggle/input/NAMA_DATASET/manifests'
# Local contoh:
# BASE_DIR     = r'D:\NPZ'
# MANIFEST_DIR = r'D:\NPZ\manifests'

BASE_DIR = '/kaggle/input/NAMA_DATASET/NPZ'
MANIFEST_DIR = '/kaggle/input/NAMA_DATASET/manifests'
OUT_DIR = '/kaggle/working/run_h60_BTDRadar'
os.makedirs(OUT_DIR, exist_ok=True)

# ====== KONFIGURASI EKSPERIMEN ======
MODE = 'BTDRadar'      # 'BTDRadar' | 'onlyRadar' | 'BTDOnly'
HORIZON = 60           # 10 | 30 | 60
EPOCHS = 80
BATCH_SIZE = 4
WINDOW_STEPS = 12
CADENCE_MIN = 10
SEED = 42
EXPECTED_HW = (128, 128)  # ubah jika berbeda

print('REPO_DIR:', REPO_DIR)
print('BASE_DIR:', BASE_DIR)
print('MANIFEST_DIR:', MANIFEST_DIR)
print('OUT_DIR:', OUT_DIR)


## 2. Import modul dan siapkan generator

In [None]:
import tensorflow as tf

from src.data.npz_sequence_tf import NPZSequence, compute_norm_stats, estimate_alpha_from_targets, NormStats
from src.modeling import build_model, compile_model
from src.metrics_tf import F1ScorePerChannel, ThreatScorePerChannel, BiasMetricPerChannel, LRSnapshot

tf.keras.utils.set_random_seed(SEED)

train_csv = os.path.join(MANIFEST_DIR, f'train_lead{HORIZON}.csv')
val_csv   = os.path.join(MANIFEST_DIR, f'val_lead{HORIZON}.csv')

train_seq = NPZSequence(
    base_dir=BASE_DIR,
    manifest_csv=train_csv,
    mode=MODE,
    horizon_min=HORIZON,
    batch_size=BATCH_SIZE,
    window_steps=WINDOW_STEPS,
    cadence_min=CADENCE_MIN,
    shuffle=True,
    seed=SEED,
    expected_hw=EXPECTED_HW,
    flatten_time_channels=True,
    norm_stats=None,
)

val_seq = NPZSequence(
    base_dir=BASE_DIR,
    manifest_csv=val_csv,
    mode=MODE,
    horizon_min=HORIZON,
    batch_size=BATCH_SIZE,
    window_steps=WINDOW_STEPS,
    cadence_min=CADENCE_MIN,
    shuffle=False,
    seed=SEED,
    expected_hw=EXPECTED_HW,
    flatten_time_channels=True,
    norm_stats=None,
)

print('train batches:', len(train_seq), 'val batches:', len(val_seq))
print('C_base:', train_seq.c_base, 'input_channels:', WINDOW_STEPS * train_seq.c_base)


## 3. Hitung / load normalisasi (mean/std) dan alpha class-balance

File akan disimpan ke `OUT_DIR/norm_stats.json` dan `OUT_DIR/alpha.json`.

In [None]:
norm_path = os.path.join(OUT_DIR, 'norm_stats.json')
if os.path.exists(norm_path):
    norm = NormStats.from_json(json.load(open(norm_path, 'r')))
else:
    norm = compute_norm_stats(train_seq, max_batches=None)
    json.dump(norm.to_json(), open(norm_path, 'w'), indent=2)

train_seq.norm_stats = norm
val_seq.norm_stats = norm

alpha = estimate_alpha_from_targets(train_seq, max_batches=None)
json.dump({'alpha': alpha}, open(os.path.join(OUT_DIR, 'alpha.json'), 'w'), indent=2)

print('norm mean:', norm.mean, 'norm std:', norm.std)
print('alpha:', alpha)


## 4. Build & compile model

`input_size=(H,W,WINDOW_STEPS*C_base)` mengikuti format `(B,H,W,T*C)` yang digunakan temporal stem 3D.

In [None]:
H, W = EXPECTED_HW
input_channels = WINDOW_STEPS * train_seq.c_base

model = build_model(input_size=(H, W, input_channels), t_steps=WINDOW_STEPS, with_deep_supervision=True)
model, _ = compile_model(
    model,
    y_train=tf.constant([0.0]),  # dummy, karena alpha_override dipakai
    steps_per_epoch=len(train_seq),
    total_epochs=EPOCHS,
    base_lr=6e-4,
    weight_decay=1e-4,
    clip_norm=1.0,
    alpha_override=alpha,
    extra_metrics=[
        F1ScorePerChannel(0, name='f1_T1'),
        F1ScorePerChannel(1, name='f1_T2'),
        F1ScorePerChannel(2, name='f1_T3'),
        ThreatScorePerChannel(0, name='ts_T1'),
        ThreatScorePerChannel(1, name='ts_T2'),
        ThreatScorePerChannel(2, name='ts_T3'),
        BiasMetricPerChannel(0, name='bias_T1'),
        BiasMetricPerChannel(1, name='bias_T2'),
        BiasMetricPerChannel(2, name='bias_T3'),
    ],
)

model.summary()


## 5. Training

- `best.weights.h5`: bobot terbaik (monitor `val_main_pr_auc_T3`)
- `last.weights.h5`: bobot terakhir
- `train_log.csv`: log per epoch


In [None]:
callbacks = []
callbacks.append(LRSnapshot())
callbacks.append(tf.keras.callbacks.CSVLogger(os.path.join(OUT_DIR, 'train_log.csv')))
callbacks.append(tf.keras.callbacks.ModelCheckpoint(
    filepath=os.path.join(OUT_DIR, 'best.weights.h5'),
    monitor='val_main_pr_auc_T3',
    mode='max',
    save_best_only=True,
    save_weights_only=True,
    verbose=1,
))
callbacks.append(tf.keras.callbacks.EarlyStopping(
    monitor='val_main_pr_auc_T3',
    mode='max',
    patience=20,
    restore_best_weights=True,
    verbose=1,
))
callbacks.append(tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_main_pr_auc_T3',
    mode='max',
    factor=0.5,
    patience=5,
    min_lr=1e-6,
    verbose=1,
))

run_cfg = {
    'MODE': MODE,
    'HORIZON': HORIZON,
    'EPOCHS': EPOCHS,
    'BATCH_SIZE': BATCH_SIZE,
    'WINDOW_STEPS': WINDOW_STEPS,
    'CADENCE_MIN': CADENCE_MIN,
    'EXPECTED_HW': EXPECTED_HW,
    'SEED': SEED,
}
json.dump(run_cfg, open(os.path.join(OUT_DIR, 'run_config.json'), 'w'), indent=2)

history = model.fit(
    train_seq,
    validation_data=val_seq,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1,
    workers=2,
    use_multiprocessing=True,
)

model.save_weights(os.path.join(OUT_DIR, 'last.weights.h5'))
json.dump(history.history, open(os.path.join(OUT_DIR, 'history.json'), 'w'), indent=2)
print('Saved outputs to:', OUT_DIR)
