
#**SCNN Student Model - TSER for KD**

#**Setup**

---
**Install Libraries**

In [1]:
!pip install snntorch dagshub mlflow pynvml --quiet

---
**GitHub Code**

In [2]:
from google.colab import userdata
import os


# Sets environ variables for GitHub
os.environ['GITHUB_TOKEN'] = userdata.get('GITHUB_TOKEN')
os.environ['USER'] = userdata.get('USER')

# Clones the repo and changes dir
!git clone -b dev https://${GITHUB_TOKEN}@github.com/${USER}/tser-kd.git
%cd tser-kd/

fatal: destination path 'tser-kd' already exists and is not an empty directory.
/content/tser-kd


---
**Set Seed for Experiment**

In [3]:
from tser_kd.utils import setup_seed


setup_seed(42)

Random seed: 42


---
**Device Selection**

In [4]:
import torch


# Selects the device for the experiment
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

---
**MLFlow Setup**

In [5]:
import mlflow
from mlflow import MlflowClient
import dagshub


# Sets environ variables for MLFlow
os.environ['MLFLOW_TRACKING_USERNAME'] = userdata.get('USER')
os.environ['MLFLOW_TRACKING_PASSWORD'] = userdata.get('MLFLOW_TRACKING_PASSWORD')

# Init DagsHub
dagshub.init(repo_owner='matteogianferrari', repo_name='tser-kd', mlflow=True)
TRACKING_URI = "https://dagshub.com/matteogianferrari/tser-kd.mlflow"

# Sets MLFlow tracking URI
mlflow.set_tracking_uri(TRACKING_URI)

# Sets MLFLow experiment name
experiment_name = "TSER-KD Student"

#**Hyperparameters**

In [6]:
# Hyperparamter dictionary
h_dict = {
    "BETA": 0.5, "V_th": 1.0,                               # Leaky Neuron
    "MAX_EPOCHS": 15, "BATCH_SIZE": 32,                    # Training
    "LR_SCHEDULER": "CosineAnnealingLR", "BASE_LR": 1e-3,   # LR
    "OPTIMIZER": "AdamW", "WEIGHT_DECAY": 5e-4,             # Optimizer
    "ES_PATIENCE": 100, "ES_DELTA": 1e-4,                    # Early Stopping
    "HARDWARE": "A100",                                     # GPU
    "ENCODER": "Static", "T": 4,                            # Encoder
    "LEARN_BETA": False, "LEARN_THRESHOLD": False,
}

#**CIFAR10 Dataset**

---
**Data Loaders Creation**

In [7]:
from tser_kd.dataset import load_mnist_data
from torch.utils.data import DataLoader


train_dataset, val_dataset, num_classes = load_mnist_data()

# Creates the train and test DataLoaders
train_loader = DataLoader(train_dataset, batch_size=h_dict['BATCH_SIZE'], shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=h_dict['BATCH_SIZE'], shuffle=False, num_workers=2)

#**S-CNN**

---
**Model Creation**

In [8]:
from tser_kd.model.student import make_student_model


# SNN
s_model = make_student_model(
    arch='scnn',
    in_channels=1,
    num_classes=num_classes,
    beta=h_dict['BETA'],
    threshold=h_dict['V_th'],
    device=device,
    learn_beta=h_dict['LEARN_BETA'],
    learn_threshold=h_dict['LEARN_THRESHOLD']
)

#**Training**

---
**Objects Creation**

In [9]:
import torch.optim as optim
import torch.nn as nn
from tser_kd.training import EarlyStopping
from tser_kd.dataset import RateEncoder, StaticEncoder
from tser_kd.model import TSCELoss
from tser_kd.utils import AccuracyMonitor


# Optimizer
if h_dict["OPTIMIZER"] == 'AdamW':
    optimizer = optim.AdamW(s_model.parameters(), lr=h_dict['BASE_LR'], weight_decay=h_dict['WEIGHT_DECAY'])
elif h_dict["OPTIMIZER"] == 'Adam':
    optimizer = optim.Adam(s_model.parameters(), lr=h_dict['BASE_LR'], weight_decay=h_dict['WEIGHT_DECAY'])
elif h_dict["OPTIMIZER"] == 'SGD':
    optimizer = optim.SGD(s_model.parameters(), lr=h_dict['BASE_LR'], momentum=h_dict["MOMENTUM"], weight_decay=h_dict['WEIGHT_DECAY'])

# LR scheduler
if h_dict["LR_SCHEDULER"] == 'ReduceLROnPlateau':
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=h_dict["LR_PATIENCE"], factor=h_dict["LR_FACTOR"])
elif h_dict["LR_SCHEDULER"] == 'CosineAnnealingLR':
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=h_dict["MAX_EPOCHS"])
elif h_dict["LR_SCHEDULER"] == 'StepLR':
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=h_dict["LR_STEP"], gamma=h_dict["LR_FACTOR"])


# Losses
train_criterion = TSCELoss()
eval_criterion = nn.CrossEntropyLoss()

# Accuracy monitor
acc_monitor = AccuracyMonitor(path="best_acc.pth")

# Early stopping
es_callback = EarlyStopping(patience=h_dict["ES_PATIENCE"], delta=h_dict["ES_DELTA"], path="best_loss.pth")

# Gradient scaler
scaler = torch.amp.GradScaler(device='cuda')

# Encoder
if h_dict["ENCODER"] == "Rate":
    encoder = RateEncoder(num_steps=h_dict["T"], gain=h_dict["GAIN"])
elif h_dict["ENCODER"] == "Static":
    encoder = StaticEncoder(num_steps=h_dict["T"])

---
**Training Loop**

In [10]:
import pynvml
from tser_kd.eval import run_eval
from tser_kd.training import run_train


# PER FARE UN RESUME SERVE IL MODELLO CHECKPOINT, LA START_EPOCH, LA RUN_ID, E LAST_EPOCH IN SCHEDULER

# Sets the MLFlow experiment
mlflow.set_experiment(experiment_name)

epoch_i = 0
curr_lr = optimizer.param_groups[0]["lr"]

# Train the model and log with MLFlow
with mlflow.start_run(run_id=None, log_system_metrics=True):
    for epoch_i in range(h_dict["MAX_EPOCHS"]):
        train_loss, train_acc, epoch_time, train_batch_time = run_train(
            epoch_i, train_loader, s_model, train_criterion, optimizer, device, scaler, encoder
        )

        val_loss, val_acc1, val_acc5, val_batch_time = run_eval(val_loader, s_model, eval_criterion, device, encoder)

        # Logging
        print(
            f"Time: {epoch_time:.1f}s | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
            f"Val Loss: {val_loss:.4f} | Val Acc1: {val_acc1:.2f}% | Val Acc5: {val_acc5:.2f}% | LR: {curr_lr:.6f}"
        )

        mlflow.log_metrics({
            "learning_rate": curr_lr, "train_tsce_loss": train_loss, "train_acc": train_acc, "val_ce_loss": val_loss,
            "val_acc1": val_acc1, "val_acc5": val_acc5, "epoch_time": epoch_time,
            "train_batch_time": train_batch_time, "val_batch_time": val_batch_time
        }, step=epoch_i)

        # Updates the LR
        if h_dict["LR_SCHEDULER"] == 'ReduceLROnPlateau':
            scheduler.step(val_loss)
        else:
            scheduler.step()

        curr_lr = optimizer.param_groups[0]["lr"]

        # Accuracy monitor
        acc_monitor(val_acc1, epoch_i, s_model)

        # ES check
        if es_callback(val_loss, epoch_i, s_model):
            break


    # Log hyperparameters
    mlflow.log_params(h_dict)

    # Log test performance
    s_model.load_state_dict(torch.load("best_acc.pth"))
    test_ce_loss, test_acc1, test_acc5, _ = run_eval(val_loader, s_model, eval_criterion, device, encoder)
    mlflow.log_metrics({"test_ce_loss": test_ce_loss, "test_acc1": test_acc1, "test_acc5": test_acc5})

2025/07/31 17:10:15 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
Epoch 1: 100%|██████████| Batch 1875/1875 , acc=78.86%, loss=0.7126


Time: 40.9s | Train Loss: 0.7126 | Train Acc: 78.86% | Val Loss: 0.1708 | Val Acc1: 95.92% | Val Acc5: 99.89% | LR: 0.001000


Epoch 2: 100%|██████████| Batch 1875/1875 , acc=95.88%, loss=0.1550


Time: 39.3s | Train Loss: 0.1550 | Train Acc: 95.88% | Val Loss: 0.1188 | Val Acc1: 96.70% | Val Acc5: 99.85% | LR: 0.000989


Epoch 3: 100%|██████████| Batch 1875/1875 , acc=97.28%, loss=0.0978


Time: 39.4s | Train Loss: 0.0978 | Train Acc: 97.28% | Val Loss: 0.0776 | Val Acc1: 97.85% | Val Acc5: 99.92% | LR: 0.000957


Epoch 4: 100%|██████████| Batch 1875/1875 , acc=97.85%, loss=0.0740


Time: 39.0s | Train Loss: 0.0740 | Train Acc: 97.85% | Val Loss: 0.0532 | Val Acc1: 98.46% | Val Acc5: 99.97% | LR: 0.000905


Epoch 5: 100%|██████████| Batch 1875/1875 , acc=98.26%, loss=0.0585


Time: 39.9s | Train Loss: 0.0585 | Train Acc: 98.26% | Val Loss: 0.0507 | Val Acc1: 98.45% | Val Acc5: 99.97% | LR: 0.000835


Epoch 6: 100%|██████████| Batch 1875/1875 , acc=98.64%, loss=0.0469


Time: 39.0s | Train Loss: 0.0469 | Train Acc: 98.64% | Val Loss: 0.0422 | Val Acc1: 98.68% | Val Acc5: 99.99% | LR: 0.000750


Epoch 7: 100%|██████████| Batch 1875/1875 , acc=98.91%, loss=0.0372


Time: 39.2s | Train Loss: 0.0372 | Train Acc: 98.91% | Val Loss: 0.0344 | Val Acc1: 98.94% | Val Acc5: 99.98% | LR: 0.000655


Epoch 8: 100%|██████████| Batch 1875/1875 , acc=99.12%, loss=0.0311


Time: 39.9s | Train Loss: 0.0311 | Train Acc: 99.12% | Val Loss: 0.0391 | Val Acc1: 98.69% | Val Acc5: 99.99% | LR: 0.000552


Epoch 9: 100%|██████████| Batch 1875/1875 , acc=99.35%, loss=0.0236


Time: 40.0s | Train Loss: 0.0236 | Train Acc: 99.35% | Val Loss: 0.0298 | Val Acc1: 99.04% | Val Acc5: 99.99% | LR: 0.000448


Epoch 10: 100%|██████████| Batch 1875/1875 , acc=99.51%, loss=0.0190


Time: 39.3s | Train Loss: 0.0190 | Train Acc: 99.51% | Val Loss: 0.0325 | Val Acc1: 98.92% | Val Acc5: 99.99% | LR: 0.000345


Epoch 11: 100%|██████████| Batch 1875/1875 , acc=99.65%, loss=0.0148


Time: 39.1s | Train Loss: 0.0148 | Train Acc: 99.65% | Val Loss: 0.0271 | Val Acc1: 99.13% | Val Acc5: 99.99% | LR: 0.000250


Epoch 12: 100%|██████████| Batch 1875/1875 , acc=99.78%, loss=0.0115


Time: 39.1s | Train Loss: 0.0115 | Train Acc: 99.78% | Val Loss: 0.0259 | Val Acc1: 99.11% | Val Acc5: 99.99% | LR: 0.000165


Epoch 13: 100%|██████████| Batch 1875/1875 , acc=99.85%, loss=0.0096


Time: 40.1s | Train Loss: 0.0096 | Train Acc: 99.85% | Val Loss: 0.0273 | Val Acc1: 99.10% | Val Acc5: 100.00% | LR: 0.000095


Epoch 14: 100%|██████████| Batch 1875/1875 , acc=99.90%, loss=0.0082


Time: 39.6s | Train Loss: 0.0082 | Train Acc: 99.90% | Val Loss: 0.0263 | Val Acc1: 99.14% | Val Acc5: 99.99% | LR: 0.000043


Epoch 15: 100%|██████████| Batch 1875/1875 , acc=99.92%, loss=0.0076


Time: 39.6s | Train Loss: 0.0076 | Train Acc: 99.92% | Val Loss: 0.0249 | Val Acc1: 99.17% | Val Acc5: 100.00% | LR: 0.000011


2025/07/31 17:21:15 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/07/31 17:21:16 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


🏃 View run SCNNS-IS4MNIST at: https://dagshub.com/matteogianferrari/tser-kd.mlflow/#/experiments/1/runs/02fb055178714d089c3a886c1dbecd98
🧪 View experiment at: https://dagshub.com/matteogianferrari/tser-kd.mlflow/#/experiments/1
