# CIFAR-10 SoftMoE (no regularization)

Setup, train, and visualize metrics.


In [None]:
# Setup: clone repo and install requirements
%cd /content  # go back to root
!git clone https://github.com/moe-project-uu/mixture-of-experts-project.git || true
%cd mixture-of-experts-project
%pip install -r requirements.txt

# Use Colab's preinstalled torch; install package in editable mode
%pip install -U pip
%pip install -e .


In [None]:
# Train via CLI so argparse is used
%cd /content/mixture-of-experts-project

# config settings
FF_LAYER = "SoftMoE"   # "Dense" or "SoftMoE"
EPOCHS   = 100
NUM_EXPERTS = 4        # ignored for Dense

if FF_LAYER == "Dense":
    !python scripts/train_cifar10.py --FF_layer Dense --epochs $EPOCHS
else:
    !python scripts/train_cifar10.py --FF_layer SoftMoE --epochs $EPOCHS --num_experts $NUM_EXPERTS


In [None]:
# Load metrics.pt and plot loss/accuracy
import os, torch, numpy as np
import matplotlib.pyplot as plt

# Must match the run config
FF_LAYER = FF_LAYER
EPOCHS   = EPOCHS
NUM_EXPERTS = NUM_EXPERTS

run_tag = f"E{EPOCHS}" if FF_LAYER == "Dense" else f"E{EPOCHS}-X{NUM_EXPERTS}"
ckpt_dir = os.path.join("/content/mixture-of-experts-project", "checkpoints", FF_LAYER, run_tag)
metrics_path = os.path.join(ckpt_dir, "metrics.pt")
assert os.path.exists(metrics_path), f"metrics.pt not found at {metrics_path}"

# load hist dict from metrics.pt
hist = torch.load(metrics_path, map_location="cpu", weights_only=False)

train_loss = np.array(hist["train_loss"])
train_acc  = np.array(hist["train_acc"])
val_loss   = np.array(hist["val_loss"])
val_acc    = np.array(hist["val_acc"])

plt.figure(figsize=(6,4))
plt.plot(train_loss, label="train")
plt.plot(val_loss,   label="val")
plt.xlabel("epoch"); plt.ylabel("loss"); plt.title(f"{FF_LAYER} — Loss"); plt.legend(); plt.show()

plt.figure(figsize=(6,4))
plt.plot(train_acc, label="train")
plt.plot(val_acc,   label="val")
plt.xlabel("epoch"); plt.ylabel("accuracy"); plt.title(f"{FF_LAYER} — Accuracy"); plt.legend(); plt.show()


In [None]:
# Expert Utilization
from moe.utils.helpers import plot_expert_utilization
plot_expert_utilization(hist, ff_layer=FF_LAYER, epochs_to_bar=(0, 50, 99))


In [None]:
# Gating Entropy
from moe.utils.helpers import plot_gating_entropy
plot_gating_entropy(hist, ff_layer=FF_LAYER)
