<a href="https://colab.research.google.com/github/kashperova/ssl-hsi-course-work/blob/main/notebooks/supervised_conformer_subset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive

drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [1]:
!git clone https://@github.com/kashperova/ssl-hsi-course-work.git

Cloning into 'ssl-hsi-course-work'...
remote: Enumerating objects: 77, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 77 (delta 7), reused 10 (delta 3), pack-reused 51 (from 3)[K
Receiving objects: 100% (77/77), 93.20 MiB | 11.68 MiB/s, done.
Resolving deltas: 100% (8/8), done.


In [2]:
%cd ssl-hsi-course-work/src

/content/ssl-hsi-course-work/src


In [3]:
!curl -LsSf https://astral.sh/uv/install.sh | sh

downloading uv 0.7.9 x86_64-unknown-linux-gnu
no checksums to verify
installing to /usr/local/bin
  uv
  uvx
everything's installed!


In [None]:
!uv pip install --system scikit-learn==1.6.0
!uv pip install --system seaborn==0.13.0
!uv pip install --system matplotlib==3.10.1
!uv pip install --system plotly==6.0.0
!uv pip install --system torchmetrics==1.7.2
!uv pip install --system scipy==1.15.2

In [5]:
from models.conformer.model import ModModel
from modules.trainers.supervised import BaseSupervisedTrainer
from modules.datasets.hsi import HyperspectralDataset
from config.train_config import BaseTrainConfig
from utils.seed import set_seed
from utils.metrics import Metrics, Task
from utils.data import load_hsi_dataset, get_stratified_subset

import os
import wandb
import torch.optim as optim
from torch.utils.data import Subset, Dataset

from torch import nn

In [6]:
os.environ["WANDB_API_KEY"] = ""

In [7]:
wandb.init()

[34m[1mwandb[0m: Currently logged in as: [33mskashperova[0m ([33mkashperova-test[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
set_seed(42)

Random seed set to 42


In [9]:
class TrainConfig(BaseTrainConfig):
    epochs: int = 50
    train_batch_size: int = 64
    eval_batch_size: int = 64
    train_test_split: float = 0.7

In [10]:
patches, labels = load_hsi_dataset(
    dataset_name="IndianPines", root_dir="../data", pca_components=30, patch_size=15
)

In [11]:
dataset = HyperspectralDataset(patches, labels)
dataset10 = get_stratified_subset(dataset, 0.2)
dataset20 = get_stratified_subset(dataset, 0.1)

In [35]:
def train(subset: Subset, num_classes: int, save_name: str):
    model = ModModel(in_channels=30, num_classes=num_classes)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.5)
    criterion = nn.CrossEntropyLoss()

    trainer = BaseSupervisedTrainer(
        model=model,
        optimizer=optimizer,
        lr_scheduler=scheduler,
        dataset=subset,
        config=TrainConfig(),
        metrics=Metrics(
            task=Task.MULTICLASS_CLASSIFICATION,
            num_classes=num_classes,
            average="micro",
        ),
        loss_fn=criterion,
        save_name=save_name,
    )
    trainer.train(verbose=False)


def reinit_wandb():
    wandb.finish(quiet=True)
    wandb.init()

In [15]:
train(dataset20, 16, "conformer_20_ip")

Training: 100%|██████████| 50/50 [00:29<00:00,  1.67it/s]


In [None]:
reinit_wandb()

In [17]:
train(dataset10, 16, "conformer_10_ip")

Training: 100%|██████████| 50/50 [00:58<00:00,  1.18s/it]


In [18]:
patches, labels = load_hsi_dataset(
    dataset_name="PaviaUniversity", root_dir="../data", pca_components=30, patch_size=15
)
dataset = HyperspectralDataset(patches, labels)
dataset10 = get_stratified_subset(dataset, 0.2)
dataset20 = get_stratified_subset(dataset, 0.1)

In [None]:
reinit_wandb()

In [21]:
train(dataset20, 9, "mod_model_20_pu")

Training: 100%|██████████| 50/50 [01:58<00:00,  2.37s/it]


In [None]:
reinit_wandb()

In [23]:
train(dataset10, 9, "mod_model_10_pu")

Training: 100%|██████████| 50/50 [03:54<00:00,  4.70s/it]


In [24]:
patches, labels = load_hsi_dataset(
    dataset_name="KSC", root_dir="../data", pca_components=30, patch_size=15
)
dataset = HyperspectralDataset(patches, labels)
dataset10 = get_stratified_subset(dataset, 0.2)
dataset20 = get_stratified_subset(dataset, 0.1)

In [None]:
reinit_wandb()

In [29]:
train(dataset20, 13, "mod_model_20_ksc")

Training: 100%|██████████| 50/50 [00:16<00:00,  3.10it/s]


In [None]:
reinit_wandb()

In [36]:
train(dataset10, 13, "mod_model_10_ksc")

Training: 100%|██████████| 50/50 [00:30<00:00,  1.63it/s]
