# Load Config

In [1]:
import os
from hydra import initialize_config_dir, compose
config_dir = os.path.abspath('../configs')
with initialize_config_dir(config_dir=config_dir, version_base="1.1"):
    cfg = compose(
        config_name="config",
        overrides=[
            "model=vit",
            "data=domars16k",
            "seed=0",
            "training.batch_size=16",
        ],
    )

In [2]:
from omegaconf import OmegaConf
print(OmegaConf.to_yaml(cfg))

seed: 0
task: classification
data:
  split:
    train: 0.6
    val: 0.2
    test: 0.2
  valid_image_extensions:
  - jpg
  - JPG
  - jpeg
  - JPEG
  - png
  - PNG
  - tif
  - TIF
  name: DoMars16k
  status: ready
  data_dir: /data/hkerner/MarsBench/Datasets/DoMars16K/data/
  annot_csv: /data/hkerner/MarsBench/Datasets/DoMars16K/annotation.csv
  num_classes: 15
  model_loc: /data/hkerner/MarsBench/Models/DoMars16K/${model.name}/
model:
  classification:
    name: VisionTransformer
    class_path: src.models.classification.ViT
    status: ready
    pretrained: true
    freeze_layers: true
    input_size:
    - 3
    - 224
    - 224
transforms:
  image_size:
  - 224
  - 224
  mean:
  - 0.485
  - 0.456
  - 0.406
  std:
  - 0.229
  - 0.224
  - 0.225
training:
  batch_size: 16
  num_workers: 4
  max_epochs: 10
optimizer:
  name: adam
  lr: 0.001
criterion:
  name: cross_entropy
test:
  data:
    status:
    - ready
    subset_size: 100
  model:
    status:
    - ready
    with_tuple_output:
 

# Prepare Data

In [3]:
import sys
sys.path.append('..')
from src.data import *
from src.utils.transforms import get_transforms

In [4]:
# # Just for explaination purpose, doesn't need to be run
# train_transform, val_transform = get_transforms(cfg)
# train_dataset, val_dataset, test_dataset = get_dataset(cfg, train_transform, val_transform)

In [5]:
from src.data.mars_datamodule import MarsDataModule
data_module = MarsDataModule(cfg)
data_module.setup()

train_loader = data_module.train_dataloader()
val_loader = data_module.val_dataloader()
test_loader = data_module.test_dataloader()



# Load Model

In [6]:
from pytorch_lightning import Trainer
from src.models.classification.ViT import ViT

In [7]:
model = ViT(cfg)
trainer = Trainer(max_epochs=cfg.training.max_epochs, fast_dev_run=True)

/home/vmalaviy/.conda/envs/vl/lib/python3.11/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/vmalaviy/.conda/envs/vl/lib/python3.11/site-pa ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/vmalaviy/.conda/envs/vl/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by de

# Training

In [8]:
model.train()
trainer.fit(
    model, train_dataloaders=train_loader, val_dataloaders=val_loader
)

You are using a CUDA device ('NVIDIA A100-SXM4-80GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | model     | VisionTransformer | 303 M  | train
1 | criterion | CrossEntropyLoss  | 0      | train
--------------------------------------------------------
15.4 K    Trainable params
303 M     Non-trainable params
303 M     Total params
1,213.268 Total estimated model params size (MB)
297       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=1` reached.


# Test

In [9]:
model.eval()
trainer.test(
    model, dataloaders=test_loader
)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 2.343736410140991, 'test_acc': 0.25}]