In [1]:
import importlib
import data_utils
importlib.reload(data_utils)
from data_utils import PhonemeDataset
from mlp_mixer import MLPMixer

import numpy as np
import torch 
from torch.utils.data import DataLoader
import torchvision.transforms.v2 as transforms

import pytorch_lightning as pl
from lightning import Trainer
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor

torch.autograd.set_detect_anomaly(True)

  from .autonotebook import tqdm as notebook_tqdm


<torch.autograd.anomaly_mode.set_detect_anomaly at 0x14c9465e4740>

# Load data

In [2]:
transform = None
batch_size = 128

train_loader = DataLoader(
    PhonemeDataset(
        data_filename='../Data/Phoneme/train_X.npy',
        label_filename='../Data/Phoneme/train_y.npy',
        transform=transform
    ), 
    batch_size=batch_size, 
    shuffle=True
)

val_loader = DataLoader(
    PhonemeDataset(
        data_filename='../Data/Phoneme/valid_X.npy',
        label_filename='../Data/Phoneme/valid_y.npy',
        transform=None
    ), 
    batch_size=batch_size, 
    shuffle=False
)

test_loader = DataLoader(
    PhonemeDataset(
        data_filename='../Data/Phoneme/test_X.npy',
        label_filename='../Data/Phoneme/test_y.npy',
        transform=None
    ), 
    batch_size=batch_size, 
    shuffle=False
)

# Check a batch of train data
for X, y in train_loader:
    print(f"Shape of X (batch, channels, timesteps): {X.shape}, shape of labels: {len(y)}")
    break

Shape of X (batch, channels, timesteps): torch.Size([128, 11, 220]), shape of labels: 128


# Create mlp-mixer model

In [3]:
padded_length = 220
num_classes = 39
p_dropout = 0.5
# patch_class options are: "sequential1d", "random1d", "cyclical1d"
patch_class = "cyclical1d"
patch_size = 5
hidden_dim = 16
num_blocks = 1
tokens_mlp_dim = 250
channels_mlp_dim = 1610
lr = 1.4e-4

mixer = MLPMixer(
    padded_length=padded_length,
    num_classes=num_classes,
    p_dropout=p_dropout,
    patch_class=patch_class,
    patch_size=patch_size,
    hidden_dim=hidden_dim,
    num_blocks=num_blocks,
    tokens_mlp_dim=tokens_mlp_dim,
    channels_mlp_dim=channels_mlp_dim,
    lr = lr
)

In [4]:
callbacks = [EarlyStopping(monitor="val_loss", patience=25, mode="min")]

mixer_trainer = Trainer(
    max_epochs=1000,
    callbacks=callbacks
)

mixer_trainer.fit(
    model=mixer, 
    train_dataloaders=train_loader,
    val_dataloaders=val_loader
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/linneamw/sadow_koastore/personal/linneamw/anaconda3/envs/mag_diff/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type             | Params
--------------------------------------------------
0 | loss         | CrossEntropyLoss | 0     
1 | patching     | PatchingClass    | 896   
2 | mixer_blocks | ModuleList     

                                                                           

/home/linneamw/sadow_koastore/personal/linneamw/anaconda3/envs/mag_diff/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (26) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 6: 100%|██████████| 26/26 [00:03<00:00,  8.12it/s, v_num=769, train_loss_step=3.620, val_loss=3.630, val_acc=0.0328, collapse_flg_val=4.96e+3, train_loss_epoch=3.650, train_acc=0.0317, collapse_flg_train=4.97e+3]

Epoch 107: 100%|██████████| 26/26 [00:05<00:00,  4.59it/s, v_num=769, train_loss_step=3.150, val_loss=3.430, val_acc=0.0865, collapse_flg_val=4.96e+3, train_loss_epoch=3.140, train_acc=0.137, collapse_flg_train=4.97e+3]


In [5]:
# Test
mixer_trainer.test(
    dataloaders=test_loader
)

Restoring states from the checkpoint path at /mnt/lustre/koa/koastore/sadow_group/shared/EE645/mlp-mixer-1d-classification/lightning_logs/version_769/checkpoints/epoch=107-step=2808.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /mnt/lustre/koa/koastore/sadow_group/shared/EE645/mlp-mixer-1d-classification/lightning_logs/version_769/checkpoints/epoch=107-step=2808.ckpt
SLURM auto-requeueing enabled. Setting signal handlers.


Testing DataLoader 0: 100%|██████████| 14/14 [00:01<00:00, 10.61it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.07935560494661331
        test_loss           3.4643778800964355
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 3.4643778800964355, 'test_acc': 0.07935560494661331}]