# Set device to GPU if available

In [1]:
import torch
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


# Load Dataset

In [2]:
dataset_path = f"dataset/Taskent"

from image_dataset import *

dataset = PetroTrainTestSplitDataset(folder_path=dataset_path)

train_dataset = dataset['train']
test_dataset = dataset['test']

print(len(train_dataset))
print(len(test_dataset))

print(train_dataset[0])
print(test_dataset[0])

__init__ PetroSubImageDataset: dataset/Taskent
         , image_indices=None
         , sub_image_size=480
__init__ BaseSubImageDataset with: dataset/Taskent/img
         , image_indices=None
         , sub_image_size=480
         , mask=False
__init__ BaseSubImageDataset with: dataset/Taskent/masks_machine
         , image_indices=None
         , sub_image_size=480
         , mask=True


  images = torch.stack([torch.from_numpy(img) for img in images])


756
252
tensor([[[125, 132, 140,  ..., 132, 115, 113],
         [133, 135, 131,  ..., 129, 115, 113],
         [122, 122, 120,  ..., 134, 123, 115],
         ...,
         [134, 136, 126,  ..., 148, 148, 151],
         [133, 128, 121,  ..., 158, 157, 162],
         [136, 137, 139,  ..., 174, 171, 169]],

        [[125, 132, 140,  ..., 132, 115, 113],
         [133, 135, 131,  ..., 129, 115, 113],
         [122, 122, 120,  ..., 134, 123, 115],
         ...,
         [134, 136, 126,  ..., 148, 148, 151],
         [133, 128, 121,  ..., 158, 157, 162],
         [136, 137, 139,  ..., 174, 171, 169]],

        [[125, 132, 140,  ..., 132, 115, 113],
         [133, 135, 131,  ..., 129, 115, 113],
         [122, 122, 120,  ..., 134, 123, 115],
         ...,
         [134, 136, 126,  ..., 148, 148, 151],
         [133, 128, 121,  ..., 158, 157, 162],
         [136, 137, 139,  ..., 174, 171, 169]],

        [[  2,   2,   2,  ...,   2,   2,   2],
         [  2,   2,   2,  ...,   2,   2,   2],
    

Load DINO Model and Helper Functions

In [3]:
from torch.utils.data import DataLoader
from clip_dino import DINOSegmentation, compute_iou
from dino_model import DINOPatchClassifier
from tqdm import tqdm
from einops import rearrange
import numpy as np

In [4]:
# Load smallest dino model. ViT-S/8. Here ViT-S has ~22M parameters and
# works on 8x8 patches.
dino_model = torch.hub.load('facebookresearch/dino:main', 'dino_vits8')
dino_model.eval().to(device)

Using cache found in /home/ubuntu/.cache/torch/hub/facebookresearch_dino_main


VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(8, 8), stride=(8, 8))
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): ModuleList(
    (0-11): 12 x Block(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
  )
  (norm): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
  (head): Identity()
)

In [5]:
from torchvision import transforms as T

transform = T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))

# transform = T.Compose([
#     # T.Resize((480, 480)),
#     T.ToTensor(),
#     T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
# ])


In [6]:
def get_dino_tokens_batch(X_batch):

    # X_batch.shape = [batch_size,480,480,3]

    X_batch = X_batch.float() / 255.0 # normalize
    # X_batch = X_batch.permute(0,3,1,2) # [N,H,W,C] -> [N,C,H,W]
    X_transform = torch.stack([
        transform(x) for x in X_batch
    ])
    X_transform = X_transform.to(device)

    w, h = X_transform.shape[2:]
    # 480,480

    with torch.no_grad():
        attn = dino_model.get_last_selfattention(X_transform)[:, :, 0, 1:]
        # (N,6,3600)
        # print(f"attn.shape={attn.shape}")
        nh, tokens = attn.shape[1:]
        w_feat, h_feat = w // 8, h // 8
        attn = attn.reshape(-1, nh, w_feat, h_feat)
        attn = torch.nn.functional.interpolate(attn, scale_factor=8, mode="nearest").cpu().numpy()
        all_tokens = dino_model.get_intermediate_layers(X_transform, n=1)[0]  # (N, 1+pixels, D)
    return all_tokens.cpu()

In [7]:
import numpy as np
import torch
from einops import rearrange
from scipy.stats import mode

def get_patchwise_mode(Y_batch):

    if isinstance(Y_batch, torch.Tensor):
        Y_batch = Y_batch.cpu().numpy()

    # Rearrange to 8x8 patches
    patches = rearrange(Y_batch, 'n (h ph) (w pw) -> n h w (ph pw)', ph=8, pw=8)
    # shape: (N, H//8, W//8, 64)

    # Compute mode along last axis
    patch_modes = mode(patches, axis=-1).mode  # shape: (N, H//8, W//8)

    return torch.tensor(patch_modes, dtype=torch.uint8)

# WandB Login

In [8]:
from dotenv import load_dotenv
load_dotenv()

import os

os.environ["WANDB_API_KEY"] = os.getenv("WANDB_API_KEY")

import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mthisisrmak[0m ([33mthisisrmak-stanford[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Run train/test loop

In [9]:
wandb.init(
    project="dinov2-biotic-nb-10",
    # name="run-patchwise-mode",  # Optionally change per experiment
    config={
        "num_classes": 18,
        "batch_size": 64,
        "num_iters": 50,
        "hidden_dim": 768,
        "lr": 1e-3
    }
)

config = wandb.config

train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=config.batch_size)

model = DINOPatchClassifier(num_classes=config.num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=config.lr)
loss_fn = torch.nn.CrossEntropyLoss()

for iter in range(config.num_iters):

    ### TRAINING ###

    model.train()
    total_train_loss = 0.0
    correct_train = 0
    total_train = 0
    total_train_iou = 0.0

    pbar = tqdm(train_dataloader, desc=f"Train Epoch {iter+1}/{config.num_iters}")

    for batch in pbar:
        X_batch = batch[:, :-1]
        Y_batch = batch[:, -1]

        Y = get_patchwise_mode(Y_batch)
        Y = rearrange(Y, 'n h w -> (n h w)')
        Y = Y.to(device)

        with torch.no_grad():
            X = get_dino_tokens_batch(X_batch)  # [N, 3601, D=384]
            X = X[:, 1:, :]  # drop CLS token
            X = rearrange(X, 'n p d -> (n p) d')
            X = X.to(device)

        optimizer.zero_grad()
        logits = model(X)
        loss = loss_fn(logits, Y)
        loss.backward()
        optimizer.step()

        # Stats
        batch_loss = loss.item()
        total_train_loss += batch_loss

        preds = torch.argmax(logits, dim=1)
        correct_train += (preds == Y).sum().item()
        total_train += Y.size(0)

        batch_iou = compute_iou(preds.cpu().numpy(), Y.cpu().numpy(), config.num_classes)
        total_train_iou += batch_iou

        pbar.set_postfix(loss=f"{batch_loss:.2f}", iou=f"{batch_iou:.2f}")

    avg_train_loss = total_train_loss / len(train_dataloader)
    avg_train_accuracy = correct_train / total_train
    avg_train_iou = total_train_iou / len(train_dataloader)



    # print(f"Epoch {iter+1}: Train Loss={avg_train_loss:.4f}, Accuracy={avg_train_accuracy:.4f}, IoU={avg_train_iou:.4f}")

     ### VALIDATION ###

    model.eval()
    total_val_loss = 0.0
    correct_val = 0
    total_val = 0
    total_val_iou = 0.0

    with torch.no_grad():
        pbar = tqdm(test_dataloader, desc=f"Val Epoch {iter+1}/{config.num_iters}")

        for batch in pbar:
            X_batch = batch[:, :-1]
            Y_batch = batch[:, -1]

            Y = get_patchwise_mode(Y_batch)
            Y = rearrange(Y, 'n h w -> (n h w)').to(device)

            X = get_dino_tokens_batch(X_batch)
            X = X[:, 1:, :]
            X = rearrange(X, 'n p d -> (n p) d').to(device)

            logits = model(X)
            loss = loss_fn(logits, Y)
            batch_loss = loss.item()
            total_val_loss += batch_loss

            preds = torch.argmax(logits, dim=1)
            correct_val += (preds == Y).sum().item()
            total_val += Y.size(0)

            batch_iou = compute_iou(preds.cpu().numpy(), Y.cpu().numpy(), config.num_classes)
            total_val_iou += batch_iou

            pbar.set_postfix(loss=f"{batch_loss:.2f}", iou=f"{batch_iou:.2f}")

    avg_val_loss = total_val_loss / len(test_dataloader)
    avg_val_accuracy = correct_val / total_val
    avg_val_iou = total_val_iou / len(test_dataloader)

    # ---------- LOGGING ----------
    wandb.log({
        "epoch": iter + 1,
        "train/loss": avg_train_loss,
        "train/accuracy": avg_train_accuracy,
        "train/iou": avg_train_iou,
        "val/loss": avg_val_loss,
        "val/accuracy": avg_val_accuracy,
        "val/iou": avg_val_iou
    })
    print()

    print(f"Epoch {iter+1}: "
          f"Train Loss={avg_train_loss:.4f}, Acc={avg_train_accuracy:.4f}, IoU={avg_train_iou:.4f} | "
          f"Val Loss={avg_val_loss:.4f}, Acc={avg_val_accuracy:.4f}, IoU={avg_val_iou:.4f}")

    print()

    # Save model checkpoint every 5 epochs
    if (iter + 1) % 5 == 0:
        checkpoint = {
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(), 
            'epoch': iter,
            'train_loss': avg_train_loss,
            'val_loss': avg_val_loss,
            'train_accuracy': avg_train_accuracy,
            'val_accuracy': avg_val_accuracy,
            'train_iou': avg_train_iou,
            'val_iou': avg_val_iou
        }
        
        torch.save(checkpoint, f'checkpoints/model_checkpoint_epoch_{iter+1}.pth')
        print(f"Saved model checkpoint to model_checkpoint_epoch_{iter+1}.pth")


wandb.finish()

Train Epoch 1/50: 100%|██████████| 12/12 [02:15<00:00, 11.29s/it, iou=0.04, loss=2.49]
Val Epoch 1/50: 100%|██████████| 4/4 [00:45<00:00, 11.37s/it, iou=0.04, loss=1.63]



Epoch 1: Train Loss=2.3683, Acc=0.4466, IoU=0.0308 | Val Loss=1.7224, Acc=0.5650, IoU=0.0334



Train Epoch 2/50: 100%|██████████| 12/12 [02:17<00:00, 11.43s/it, iou=0.05, loss=1.58]
Val Epoch 2/50: 100%|██████████| 4/4 [00:46<00:00, 11.54s/it, iou=0.05, loss=1.27]



Epoch 2: Train Loss=1.5737, Acc=0.5201, IoU=0.0414 | Val Loss=1.1276, Acc=0.5562, IoU=0.0506



Train Epoch 3/50: 100%|██████████| 12/12 [02:16<00:00, 11.36s/it, iou=0.07, loss=1.11]
Val Epoch 3/50: 100%|██████████| 4/4 [00:45<00:00, 11.45s/it, iou=0.08, loss=0.99]



Epoch 3: Train Loss=1.1455, Acc=0.6102, IoU=0.0517 | Val Loss=0.9862, Acc=0.6514, IoU=0.0634



Train Epoch 4/50: 100%|██████████| 12/12 [02:15<00:00, 11.29s/it, iou=0.10, loss=1.10]
Val Epoch 4/50: 100%|██████████| 4/4 [00:45<00:00, 11.32s/it, iou=0.11, loss=0.88]



Epoch 4: Train Loss=0.9442, Acc=0.6517, IoU=0.0650 | Val Loss=0.9706, Acc=0.6554, IoU=0.0751



Train Epoch 5/50: 100%|██████████| 12/12 [02:17<00:00, 11.42s/it, iou=0.09, loss=1.01]
Val Epoch 5/50: 100%|██████████| 4/4 [00:43<00:00, 10.99s/it, iou=0.11, loss=0.87]



Epoch 5: Train Loss=0.9849, Acc=0.6155, IoU=0.0578 | Val Loss=0.9146, Acc=0.6670, IoU=0.0777

Saved model checkpoint to model_checkpoint_epoch_5.pth


Train Epoch 6/50: 100%|██████████| 12/12 [02:16<00:00, 11.34s/it, iou=0.12, loss=0.90]
Val Epoch 6/50: 100%|██████████| 4/4 [00:44<00:00, 11.17s/it, iou=0.13, loss=0.78]



Epoch 6: Train Loss=0.8820, Acc=0.6639, IoU=0.0690 | Val Loss=0.8926, Acc=0.6830, IoU=0.0848



Train Epoch 7/50: 100%|██████████| 12/12 [02:15<00:00, 11.28s/it, iou=0.12, loss=0.83]
Val Epoch 7/50: 100%|██████████| 4/4 [00:44<00:00, 11.16s/it, iou=0.13, loss=0.76]



Epoch 7: Train Loss=0.8658, Acc=0.6635, IoU=0.0694 | Val Loss=0.8610, Acc=0.6916, IoU=0.0868



Train Epoch 8/50: 100%|██████████| 12/12 [02:14<00:00, 11.23s/it, iou=0.13, loss=0.79]
Val Epoch 8/50: 100%|██████████| 4/4 [00:45<00:00, 11.26s/it, iou=0.13, loss=0.75]



Epoch 8: Train Loss=0.8426, Acc=0.6724, IoU=0.0737 | Val Loss=0.8508, Acc=0.6948, IoU=0.0898



Train Epoch 9/50: 100%|██████████| 12/12 [02:15<00:00, 11.30s/it, iou=0.14, loss=0.74]
Val Epoch 9/50: 100%|██████████| 4/4 [00:45<00:00, 11.27s/it, iou=0.14, loss=0.73]



Epoch 9: Train Loss=0.8178, Acc=0.6820, IoU=0.0761 | Val Loss=0.8374, Acc=0.7005, IoU=0.0931



Train Epoch 10/50: 100%|██████████| 12/12 [02:17<00:00, 11.44s/it, iou=0.15, loss=0.69]
Val Epoch 10/50: 100%|██████████| 4/4 [00:44<00:00, 11.19s/it, iou=0.14, loss=0.71]



Epoch 10: Train Loss=0.7899, Acc=0.6934, IoU=0.0799 | Val Loss=0.8190, Acc=0.7074, IoU=0.0943

Saved model checkpoint to model_checkpoint_epoch_10.pth


Train Epoch 11/50: 100%|██████████| 12/12 [02:15<00:00, 11.29s/it, iou=0.16, loss=0.66]
Val Epoch 11/50: 100%|██████████| 4/4 [00:44<00:00, 11.09s/it, iou=0.14, loss=0.70]



Epoch 11: Train Loss=0.7686, Acc=0.7014, IoU=0.0824 | Val Loss=0.8079, Acc=0.7121, IoU=0.0967



Train Epoch 12/50: 100%|██████████| 12/12 [02:16<00:00, 11.36s/it, iou=0.16, loss=0.64]
Val Epoch 12/50: 100%|██████████| 4/4 [00:44<00:00, 11.16s/it, iou=0.14, loss=0.69]



Epoch 12: Train Loss=0.7528, Acc=0.7067, IoU=0.0847 | Val Loss=0.8011, Acc=0.7148, IoU=0.0978



Train Epoch 13/50: 100%|██████████| 12/12 [02:17<00:00, 11.43s/it, iou=0.16, loss=0.62]
Val Epoch 13/50: 100%|██████████| 4/4 [00:45<00:00, 11.36s/it, iou=0.14, loss=0.68]



Epoch 13: Train Loss=0.7388, Acc=0.7124, IoU=0.0870 | Val Loss=0.7923, Acc=0.7180, IoU=0.0995



Train Epoch 14/50: 100%|██████████| 12/12 [02:16<00:00, 11.36s/it, iou=0.17, loss=0.59]
Val Epoch 14/50: 100%|██████████| 4/4 [00:45<00:00, 11.38s/it, iou=0.15, loss=0.67]



Epoch 14: Train Loss=0.7191, Acc=0.7210, IoU=0.0899 | Val Loss=0.7851, Acc=0.7211, IoU=0.1002



Train Epoch 15/50: 100%|██████████| 12/12 [02:16<00:00, 11.35s/it, iou=0.17, loss=0.57]
Val Epoch 15/50: 100%|██████████| 4/4 [00:45<00:00, 11.32s/it, iou=0.15, loss=0.66]



Epoch 15: Train Loss=0.7033, Acc=0.7278, IoU=0.0923 | Val Loss=0.7751, Acc=0.7251, IoU=0.1019

Saved model checkpoint to model_checkpoint_epoch_15.pth


Train Epoch 16/50: 100%|██████████| 12/12 [02:16<00:00, 11.40s/it, iou=0.17, loss=0.55]
Val Epoch 16/50: 100%|██████████| 4/4 [00:45<00:00, 11.45s/it, iou=0.15, loss=0.66]



Epoch 16: Train Loss=0.6880, Acc=0.7337, IoU=0.0947 | Val Loss=0.7689, Acc=0.7280, IoU=0.1022



Train Epoch 17/50: 100%|██████████| 12/12 [02:14<00:00, 11.25s/it, iou=0.17, loss=0.54]
Val Epoch 17/50: 100%|██████████| 4/4 [00:45<00:00, 11.43s/it, iou=0.15, loss=0.65]



Epoch 17: Train Loss=0.6778, Acc=0.7379, IoU=0.0968 | Val Loss=0.7620, Acc=0.7303, IoU=0.1041



Train Epoch 18/50: 100%|██████████| 12/12 [02:17<00:00, 11.45s/it, iou=0.17, loss=0.53]
Val Epoch 18/50: 100%|██████████| 4/4 [00:45<00:00, 11.48s/it, iou=0.15, loss=0.65]



Epoch 18: Train Loss=0.6658, Acc=0.7433, IoU=0.0990 | Val Loss=0.7604, Acc=0.7308, IoU=0.1039



Train Epoch 19/50: 100%|██████████| 12/12 [02:16<00:00, 11.38s/it, iou=0.18, loss=0.51]
Val Epoch 19/50: 100%|██████████| 4/4 [00:45<00:00, 11.47s/it, iou=0.15, loss=0.64]



Epoch 19: Train Loss=0.6547, Acc=0.7487, IoU=0.1013 | Val Loss=0.7530, Acc=0.7331, IoU=0.1055



Train Epoch 20/50: 100%|██████████| 12/12 [02:19<00:00, 11.62s/it, iou=0.18, loss=0.50]
Val Epoch 20/50: 100%|██████████| 4/4 [00:45<00:00, 11.45s/it, iou=0.15, loss=0.64]



Epoch 20: Train Loss=0.6405, Acc=0.7551, IoU=0.1039 | Val Loss=0.7480, Acc=0.7359, IoU=0.1058

Saved model checkpoint to model_checkpoint_epoch_20.pth


Train Epoch 21/50: 100%|██████████| 12/12 [02:16<00:00, 11.38s/it, iou=0.20, loss=0.49]
Val Epoch 21/50: 100%|██████████| 4/4 [00:44<00:00, 11.22s/it, iou=0.15, loss=0.63]



Epoch 21: Train Loss=0.6298, Acc=0.7597, IoU=0.1076 | Val Loss=0.7400, Acc=0.7390, IoU=0.1085



Train Epoch 22/50: 100%|██████████| 12/12 [02:17<00:00, 11.46s/it, iou=0.22, loss=0.48]
Val Epoch 22/50: 100%|██████████| 4/4 [00:45<00:00, 11.50s/it, iou=0.15, loss=0.63]



Epoch 22: Train Loss=0.6204, Acc=0.7634, IoU=0.1105 | Val Loss=0.7384, Acc=0.7403, IoU=0.1095



Train Epoch 23/50: 100%|██████████| 12/12 [02:16<00:00, 11.40s/it, iou=0.23, loss=0.47]
Val Epoch 23/50: 100%|██████████| 4/4 [00:46<00:00, 11.61s/it, iou=0.16, loss=0.63]



Epoch 23: Train Loss=0.6131, Acc=0.7670, IoU=0.1136 | Val Loss=0.7363, Acc=0.7407, IoU=0.1149



Train Epoch 24/50: 100%|██████████| 12/12 [02:16<00:00, 11.36s/it, iou=0.23, loss=0.46]
Val Epoch 24/50: 100%|██████████| 4/4 [00:45<00:00, 11.40s/it, iou=0.15, loss=0.63]



Epoch 24: Train Loss=0.6065, Acc=0.7702, IoU=0.1139 | Val Loss=0.7343, Acc=0.7417, IoU=0.1173



Train Epoch 25/50: 100%|██████████| 12/12 [02:15<00:00, 11.33s/it, iou=0.24, loss=0.45]
Val Epoch 25/50: 100%|██████████| 4/4 [00:45<00:00, 11.32s/it, iou=0.16, loss=0.63]



Epoch 25: Train Loss=0.5946, Acc=0.7761, IoU=0.1177 | Val Loss=0.7328, Acc=0.7428, IoU=0.1206

Saved model checkpoint to model_checkpoint_epoch_25.pth


Train Epoch 26/50: 100%|██████████| 12/12 [02:17<00:00, 11.46s/it, iou=0.24, loss=0.44]
Val Epoch 26/50: 100%|██████████| 4/4 [00:46<00:00, 11.50s/it, iou=0.15, loss=0.63]



Epoch 26: Train Loss=0.5910, Acc=0.7782, IoU=0.1160 | Val Loss=0.7250, Acc=0.7468, IoU=0.1205



Train Epoch 27/50: 100%|██████████| 12/12 [02:16<00:00, 11.37s/it, iou=0.24, loss=0.43]
Val Epoch 27/50: 100%|██████████| 4/4 [00:45<00:00, 11.43s/it, iou=0.16, loss=0.62]



Epoch 27: Train Loss=0.5756, Acc=0.7843, IoU=0.1209 | Val Loss=0.7252, Acc=0.7477, IoU=0.1226



Train Epoch 28/50: 100%|██████████| 12/12 [02:15<00:00, 11.31s/it, iou=0.24, loss=0.43]
Val Epoch 28/50: 100%|██████████| 4/4 [00:45<00:00, 11.46s/it, iou=0.15, loss=0.64]



Epoch 28: Train Loss=0.5832, Acc=0.7826, IoU=0.1172 | Val Loss=0.7214, Acc=0.7504, IoU=0.1197



Train Epoch 29/50: 100%|██████████| 12/12 [02:16<00:00, 11.41s/it, iou=0.24, loss=0.42]
Val Epoch 29/50: 100%|██████████| 4/4 [00:45<00:00, 11.34s/it, iou=0.16, loss=0.62]



Epoch 29: Train Loss=0.5655, Acc=0.7888, IoU=0.1220 | Val Loss=0.7215, Acc=0.7486, IoU=0.1235



Train Epoch 30/50: 100%|██████████| 12/12 [02:15<00:00, 11.25s/it, iou=0.24, loss=0.43]
Val Epoch 30/50: 100%|██████████| 4/4 [00:45<00:00, 11.40s/it, iou=0.15, loss=0.65]



Epoch 30: Train Loss=0.5757, Acc=0.7869, IoU=0.1186 | Val Loss=0.7260, Acc=0.7499, IoU=0.1179

Saved model checkpoint to model_checkpoint_epoch_30.pth


Train Epoch 31/50: 100%|██████████| 12/12 [02:16<00:00, 11.41s/it, iou=0.24, loss=0.42]
Val Epoch 31/50: 100%|██████████| 4/4 [00:45<00:00, 11.39s/it, iou=0.17, loss=0.62]



Epoch 31: Train Loss=0.5566, Acc=0.7946, IoU=0.1219 | Val Loss=0.7171, Acc=0.7501, IoU=0.1257



Train Epoch 32/50: 100%|██████████| 12/12 [02:16<00:00, 11.34s/it, iou=0.24, loss=0.42]
Val Epoch 32/50: 100%|██████████| 4/4 [00:46<00:00, 11.54s/it, iou=0.15, loss=0.64]



Epoch 32: Train Loss=0.5577, Acc=0.7937, IoU=0.1208 | Val Loss=0.7234, Acc=0.7529, IoU=0.1174



Train Epoch 33/50: 100%|██████████| 12/12 [02:17<00:00, 11.46s/it, iou=0.23, loss=0.44]
Val Epoch 33/50: 100%|██████████| 4/4 [00:44<00:00, 11.06s/it, iou=0.17, loss=0.63]



Epoch 33: Train Loss=0.5515, Acc=0.7989, IoU=0.1205 | Val Loss=0.7108, Acc=0.7547, IoU=0.1264



Train Epoch 34/50: 100%|██████████| 12/12 [02:17<00:00, 11.45s/it, iou=0.24, loss=0.41]
Val Epoch 34/50: 100%|██████████| 4/4 [00:45<00:00, 11.32s/it, iou=0.15, loss=0.63]



Epoch 34: Train Loss=0.5430, Acc=0.7991, IoU=0.1236 | Val Loss=0.7223, Acc=0.7552, IoU=0.1179



Train Epoch 35/50: 100%|██████████| 12/12 [02:18<00:00, 11.57s/it, iou=0.23, loss=0.47]
Val Epoch 35/50: 100%|██████████| 4/4 [00:46<00:00, 11.63s/it, iou=0.17, loss=0.65]



Epoch 35: Train Loss=0.5540, Acc=0.7995, IoU=0.1193 | Val Loss=0.7202, Acc=0.7524, IoU=0.1254

Saved model checkpoint to model_checkpoint_epoch_35.pth


Train Epoch 36/50: 100%|██████████| 12/12 [02:17<00:00, 11.49s/it, iou=0.25, loss=0.39]
Val Epoch 36/50: 100%|██████████| 4/4 [00:45<00:00, 11.43s/it, iou=0.16, loss=0.62]



Epoch 36: Train Loss=0.5286, Acc=0.8049, IoU=0.1279 | Val Loss=0.7233, Acc=0.7523, IoU=0.1197



Train Epoch 37/50: 100%|██████████| 12/12 [02:17<00:00, 11.49s/it, iou=0.24, loss=0.43]
Val Epoch 37/50: 100%|██████████| 4/4 [00:45<00:00, 11.41s/it, iou=0.17, loss=0.65]



Epoch 37: Train Loss=0.5409, Acc=0.8034, IoU=0.1234 | Val Loss=0.7132, Acc=0.7544, IoU=0.1252



Train Epoch 38/50: 100%|██████████| 12/12 [02:17<00:00, 11.48s/it, iou=0.25, loss=0.37]
Val Epoch 38/50: 100%|██████████| 4/4 [00:47<00:00, 11.81s/it, iou=0.17, loss=0.60]



Epoch 38: Train Loss=0.5044, Acc=0.8150, IoU=0.1331 | Val Loss=0.7205, Acc=0.7552, IoU=0.1214



Train Epoch 39/50: 100%|██████████| 12/12 [02:16<00:00, 11.39s/it, iou=0.25, loss=0.39]
Val Epoch 39/50: 100%|██████████| 4/4 [00:46<00:00, 11.64s/it, iou=0.17, loss=0.63]



Epoch 39: Train Loss=0.5295, Acc=0.8069, IoU=0.1277 | Val Loss=0.7002, Acc=0.7612, IoU=0.1259



Train Epoch 40/50: 100%|██████████| 12/12 [02:15<00:00, 11.25s/it, iou=0.25, loss=0.36]
Val Epoch 40/50: 100%|██████████| 4/4 [00:48<00:00, 12.04s/it, iou=0.17, loss=0.60]



Epoch 40: Train Loss=0.4943, Acc=0.8176, IoU=0.1345 | Val Loss=0.7163, Acc=0.7579, IoU=0.1213

Saved model checkpoint to model_checkpoint_epoch_40.pth


Train Epoch 41/50: 100%|██████████| 12/12 [02:18<00:00, 11.51s/it, iou=0.25, loss=0.39]
Val Epoch 41/50: 100%|██████████| 4/4 [00:47<00:00, 11.83s/it, iou=0.17, loss=0.64]



Epoch 41: Train Loss=0.5257, Acc=0.8089, IoU=0.1316 | Val Loss=0.7095, Acc=0.7581, IoU=0.1255



Train Epoch 42/50: 100%|██████████| 12/12 [02:19<00:00, 11.61s/it, iou=0.25, loss=0.36]
Val Epoch 42/50: 100%|██████████| 4/4 [00:45<00:00, 11.48s/it, iou=0.18, loss=0.61]



Epoch 42: Train Loss=0.4975, Acc=0.8166, IoU=0.1330 | Val Loss=0.7217, Acc=0.7512, IoU=0.1241



Train Epoch 43/50: 100%|██████████| 12/12 [02:17<00:00, 11.43s/it, iou=0.25, loss=0.37]
Val Epoch 43/50: 100%|██████████| 4/4 [00:46<00:00, 11.56s/it, iou=0.17, loss=0.65]



Epoch 43: Train Loss=0.5158, Acc=0.8129, IoU=0.1353 | Val Loss=0.7189, Acc=0.7559, IoU=0.1233



Train Epoch 44/50: 100%|██████████| 12/12 [02:15<00:00, 11.31s/it, iou=0.25, loss=0.34]
Val Epoch 44/50: 100%|██████████| 4/4 [00:45<00:00, 11.36s/it, iou=0.18, loss=0.61]



Epoch 44: Train Loss=0.4949, Acc=0.8202, IoU=0.1306 | Val Loss=0.6997, Acc=0.7597, IoU=0.1268



Train Epoch 45/50: 100%|██████████| 12/12 [02:16<00:00, 11.40s/it, iou=0.25, loss=0.35]
Val Epoch 45/50: 100%|██████████| 4/4 [00:46<00:00, 11.54s/it, iou=0.17, loss=0.64]



Epoch 45: Train Loss=0.4904, Acc=0.8226, IoU=0.1382 | Val Loss=0.7153, Acc=0.7622, IoU=0.1209

Saved model checkpoint to model_checkpoint_epoch_45.pth


Train Epoch 46/50: 100%|██████████| 12/12 [02:16<00:00, 11.33s/it, iou=0.25, loss=0.35]
Val Epoch 46/50: 100%|██████████| 4/4 [00:47<00:00, 11.89s/it, iou=0.18, loss=0.65]



Epoch 46: Train Loss=0.5044, Acc=0.8213, IoU=0.1278 | Val Loss=0.6963, Acc=0.7675, IoU=0.1281



Train Epoch 47/50: 100%|██████████| 12/12 [02:16<00:00, 11.34s/it, iou=0.26, loss=0.34]
Val Epoch 47/50: 100%|██████████| 4/4 [00:44<00:00, 11.16s/it, iou=0.18, loss=0.61]



Epoch 47: Train Loss=0.4726, Acc=0.8283, IoU=0.1402 | Val Loss=0.7028, Acc=0.7635, IoU=0.1236



Train Epoch 48/50: 100%|██████████| 12/12 [02:15<00:00, 11.27s/it, iou=0.25, loss=0.38]
Val Epoch 48/50: 100%|██████████| 4/4 [00:45<00:00, 11.41s/it, iou=0.18, loss=0.65]



Epoch 48: Train Loss=0.4898, Acc=0.8223, IoU=0.1313 | Val Loss=0.7050, Acc=0.7589, IoU=0.1285



Train Epoch 49/50: 100%|██████████| 12/12 [02:17<00:00, 11.44s/it, iou=0.26, loss=0.34]
Val Epoch 49/50: 100%|██████████| 4/4 [00:46<00:00, 11.63s/it, iou=0.19, loss=0.61]



Epoch 49: Train Loss=0.4698, Acc=0.8290, IoU=0.1418 | Val Loss=0.7153, Acc=0.7588, IoU=0.1229



Train Epoch 50/50: 100%|██████████| 12/12 [02:19<00:00, 11.62s/it, iou=0.25, loss=0.38]
Val Epoch 50/50: 100%|██████████| 4/4 [00:46<00:00, 11.51s/it, iou=0.18, loss=0.66]
[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.



Epoch 50: Train Loss=0.4834, Acc=0.8253, IoU=0.1354 | Val Loss=0.7097, Acc=0.7581, IoU=0.1277

Saved model checkpoint to model_checkpoint_epoch_50.pth


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/accuracy,▁▂▄▅▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
train/iou,▁▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇█▇███▇█▇█
train/loss,█▅▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/accuracy,▁▁▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇███▇████▇██████
val/iou,▁▂▃▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇█▇█▇▇██▇██▇███▇████
val/loss,█▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,50.0
train/accuracy,0.82526
train/iou,0.13539
train/loss,0.48342
val/accuracy,0.75808
val/iou,0.12774
val/loss,0.70965


In [None]:
# checkpoint = {
#     'model_state_dict': model.state_dict(),
#     'optimizer_state_dict': optimizer.state_dict(), 
#     'epoch': iter,
#     'train_loss': avg_train_loss,
#     'val_loss': avg_val_loss,
#     'train_accuracy': avg_train_accuracy,
#     'val_accuracy': avg_val_accuracy,
#     'train_iou': avg_train_iou,
#     'val_iou': avg_val_iou
# }

# torch.save(checkpoint, f'checkpoints/model_checkpoint_epoch_{iter+1}.pth')
# print(f"Saved model checkpoint to model_checkpoint_epoch_{iter+1}.pth")
