# MiniRocket - Model Trainig
MiniRocket + RidgeClassifierCV experiment

In [None]:
from pathlib import Path
import json
import numpy as np
import pandas as pd

from sklearn.linear_model import RidgeClassifierCV
from sklearn.metrics import accuracy_score, f1_score, classification_report

from sktime.transformations.panel.rocket import MiniRocketMultivariate

In [None]:
DATA_DIR = Path("../data")
MODEL_DIR = Path("../models")

ARTIFACTS_MINI = DATA_DIR / "artifacts/minirocket"
ARTIFACTS_MINI.mkdir(parents=True, exist_ok=True)

RUN_DIR = MODEL_DIR / "minirocket/baseline"
RUN_DIR.mkdir(parents=True, exist_ok=True)

## Load Data

In [None]:
# Load manifest (metadata)
manifest_path = ARTIFACTS_MINI / "manifest.json"
with open(manifest_path, "r") as f:
    manifest = json.load(f)

print("Manifest loaded.")
print(json.dumps({
    "resample_len": manifest["resample_len"],
    "n_channels": len(manifest["sensor_channels"]),
    "paths": manifest["paths"],
}, indent=2))

# %% Load nested panels + labels
train_nested = pd.read_pickle(ARTIFACTS_MINI / "train_nested.pkl")
val_nested   = pd.read_pickle(ARTIFACTS_MINI / "val_nested.pkl")
test_nested  = pd.read_pickle(ARTIFACTS_MINI / "test_nested.pkl")

y_train = pd.read_parquet(ARTIFACTS_MINI / "train_y.parquet")["label_id"].to_numpy()
y_val   = pd.read_parquet(ARTIFACTS_MINI / "val_y.parquet")["label_id"].to_numpy()
y_test  = pd.read_parquet(ARTIFACTS_MINI / "test_y.parquet")["label_id"].to_numpy()

print("Shapes:")
print("  train:", train_nested.shape, "labels:", y_train.shape)
print("  val:  ", val_nested.shape,   "labels:", y_val.shape)
print("  test: ", test_nested.shape, "labels:", y_test.shape)

In [None]:
# Prepare y arrays (loaded earlier)
# Assumes you already did:
# train_nested, val_nested, test_nested
# y_train, y_val, y_test

ytr = np.asarray(y_train)
yva = np.asarray(y_val)
yte = np.asarray(y_test)

# Optional: class names from manifest (if present)
try:
    with open(ARTIFACTS_MINI / "splits.json") as f:
        _spl = json.load(f)
    id_to_label = {int(k): v for k, v in _spl["id_to_label"].items()}
    classes_sorted = sorted(id_to_label.keys())
    class_names = [id_to_label[c] for c in classes_sorted]
except Exception:
    id_to_label, class_names = None, None


## MiniRocket - Build Features

In [None]:
# Fit MiniRocket 
from sktime.transformations.panel.rocket import MiniRocketMultivariate
import numpy as np
import pandas as pd
from pathlib import Path
import json
import time
from sklearn.linear_model import RidgeClassifierCV
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix


# Fit MiniRocket on train
seed = 42
t0 = time.time()
trf = MiniRocketMultivariate(random_state=seed)
trf.fit(train_nested)
t_fit = time.time() - t0
print(f"MiniRocket fitted in {t_fit:.2f}s")

# Transform splits
t0 = time.time()
Xtr = trf.transform(train_nested)
Xva = trf.transform(val_nested)
Xte = trf.transform(test_nested)
t_tr = time.time() - t0
print("Shapes after transform:", Xtr.shape, Xva.shape, Xte.shape)
print(f"Transforms done in {t_tr:.2f}s")


## RidgeClassifierCV - Train

In [None]:
# Train RidgeClassifierCV (fast, strong baseline)
clf = RidgeClassifierCV(
    alphas=np.logspace(-3, 3, 13),
    class_weight="balanced",
    store_cv_values=False,
)
t0 = time.time()
clf.fit(Xtr, ytr)
t_clf = time.time() - t0
print(f"RidgeClassifierCV trained in {t_clf:.2f}s")


## Evaluate

In [None]:
from sklearn.metrics import (
    accuracy_score, f1_score, classification_report, confusion_matrix
)

# Evaluate helper
def eval_split(X, y, name):
    y_pred = clf.predict(X)
    
    # Core quick metrics
    acc = accuracy_score(y, y_pred)
    f1m = f1_score(y, y_pred, average="macro")
    f1w = f1_score(y, y_pred, average="weighted")
    
    print(f"[{name}] acc={acc:.4f}  f1_macro={f1m:.4f}  f1_weighted={f1w:.4f}")
    
    # Report as dict
    rep_dict = classification_report(
        y, y_pred, target_names=class_names, zero_division=0, output_dict=True
    )
    rep_text = classification_report(
        y, y_pred, target_names=class_names, zero_division=0
    )
    print(rep_text)
    
    cm = confusion_matrix(y, y_pred)
    
    # Build details structure
    details = {
        "macro": {
            "accuracy": acc,
            "f1": rep_dict["macro avg"]["f1-score"],
            "precision": rep_dict["macro avg"]["precision"],
            "recall": rep_dict["macro avg"]["recall"],
        },
        "weighted": {
            "f1": rep_dict["weighted avg"]["f1-score"],
            "precision": rep_dict["weighted avg"]["precision"],
            "recall": rep_dict["weighted avg"]["recall"],
        },
        "per_class": {
            cls: {
                "f1": rep_dict[cls]["f1-score"],
                "precision": rep_dict[cls]["precision"],
                "recall": rep_dict[cls]["recall"],
            }
            for cls in class_names
        }
    }
    
    return {
        "accuracy": acc,
        "f1_macro": f1m,
        "f1_weighted": f1w,
        "report": rep_text,
        "confusion_matrix": cm.tolist(),
        "details": details,
    }

# Usage
val_metrics  = eval_split(Xva, yva, "VAL")
test_metrics = eval_split(Xte, yte, "TEST")


## Persist

In [None]:
import pickle

summary = {
    "model": "MiniRocketMultivariate + RidgeClassifierCV",
    "params": {
        "random_state": seed,
        "n_kernels": int(Xtr.shape[1]),
    },
    "train_time": {
        "transformer_fit_s": t_fit,
        "transform_s": t_tr,
        "clf_fit_s": t_clf,
    },
    "val": {k: (v if k != "report" else None) for k, v in val_metrics.items()},
    "test": {k: (v if k != "report" else None) for k, v in test_metrics.items()},
    "class_names": class_names,
}

# Save artifacts
with open(RUN_DIR / "transformer.pkl", "wb") as f:
    pickle.dump(trf, f)
with open(RUN_DIR / "classifier.pkl", "wb") as f:
    pickle.dump(clf, f)

# Save metrics JSON
pd.Series(summary, dtype="object").to_json(RUN_DIR / "metrics.json", force_ascii=False)

# Save full reports separately (human-readable)
with open(RUN_DIR / "val_classification_report.txt", "w") as f:
    f.write(val_metrics["report"])
with open(RUN_DIR / "test_classification_report.txt", "w") as f:
    f.write(test_metrics["report"])

print("\nSaved:")
print(" ", RUN_DIR / "transformer.pkl")
print(" ", RUN_DIR / "classifier.pkl")
print(" ", RUN_DIR / "metrics.json")
print(" ", RUN_DIR / "val_classification_report.txt")
print(" ", RUN_DIR / "test_classification_report.txt")
