# Лабораторная работа №7: Исследования с моделями семантической сегментации

## 1. Выбор начальных условий

### Набор данных

Camvid dataset

Задача: сегментация дорожных сцен 

In [1]:
!pip install kaggle
!pip install pandas



In [3]:
!kaggle datasets download -d naureenmohammad/camvid-dataset -p data --unzip

Dataset URL: https://www.kaggle.com/datasets/naureenmohammad/camvid-dataset
License(s): unknown


In [15]:
%pip install pycocotools
%pip install segmentation_models_pytorch
%pip install opencv-python
%pip install --upgrade albumentations

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting albumentations
  Using cached albumentations-2.0.6-py3-none-any.whl.metadata (43 kB)
Collecting pydantic>=2.9.2 (from albumentations)
  Downloading pydantic-2.11.4-py3-none-any.whl.metadata (66 kB)
Collecting albucore==0.0.24 (from albumentations)
  Downloading albucore-0.0.24-py3-none-any.whl.metadata (5.3 kB)
Collecting opencv-python-headless>=4.9.0.80 (from albumentations)
  Downloading opencv_python_headless-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl.metadata (20 kB)
Collecting stringzilla>=3.10.4 (from albucore==0.0.24->albumentations)
  Downloading stringzilla-3.12.5-cp311-cp311-macosx_11_0_arm64.whl.metadata (80 kB)
Collecting simsimd>=5.9.2 (from albucore==0.0.24->albumentations)
  Downloading simsimd-6.2.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (66 kB)
Collecting annot

In [60]:
import os
import cv2
import torch
from torch.utils.data import Dataset
import numpy as np
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2


device = torch.device("cpu")


In [61]:

class CamVidDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        super().__init__()
        self.images_dir = images_dir
        self.masks_dir  = masks_dir
        self.images = sorted(os.listdir(images_dir))
        self.masks  = sorted(os.listdir(masks_dir))
        assert len(self.images) == len(self.masks), \
            "Число изображений и масок должно совпадать"
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path  = os.path.join(self.images_dir, self.images[idx])
        mask_path = os.path.join(self.masks_dir,  self.masks[idx])
        image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
        mask  = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented['image'], augmented['mask']

        return image, mask.long()

### Выбор метрики

В качестве метрики качества будем использовать F1 score (и дополнительно IoU) - стандартные метрики для задач подобного типа

## 2. Создание бейзлайна и оценка качества

### Обучение сверточной модели

In [18]:
%pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118

Looking in indexes: https://download.pytorch.org/whl/cu118
Note: you may need to restart the kernel to use updated packages.


In [62]:
import torch
import torch.nn as nn
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.metrics.functional import get_stats, iou_score, f1_score
from torch.utils.data import DataLoader
from typing import Dict

def evaluate_segmentation(
    model: nn.Module,
    dataloader: DataLoader,
    device: torch.device,
    num_classes: int
) -> Dict[str, float]:
    model.eval()
    dice_criterion = smp.losses.DiceLoss(mode='multiclass')
    ce_criterion   = nn.CrossEntropyLoss()

    cumulative_loss = 0.0
    cumulative_iou  = 0.0
    cumulative_f1   = 0.0
    batch_count     = 0

    with torch.no_grad():
        for images, masks in dataloader:
            images = images.to(device)
            masks  = masks.to(device)

            logits = model(images)
            loss   = dice_criterion(logits, masks) + ce_criterion(logits, masks)
            cumulative_loss += loss.item()

            preds = logits.argmax(dim=1)
            tp, fp, fn, tn = get_stats(
                preds, masks,
                mode='multiclass',
                num_classes=num_classes
            )
            cumulative_iou += iou_score(tp, fp, fn, tn, reduction='micro').item()
            cumulative_f1  += f1_score(tp, fp, fn, tn, reduction='micro').item()

            batch_count += 1

    avg_loss   = cumulative_loss / batch_count
    avg_iou    = cumulative_iou  / batch_count
    avg_f1     = cumulative_f1   / batch_count

    return {
        'loss': avg_loss,
        'iou':  avg_iou,
        'f1_score': avg_f1
    }

In [63]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import segmentation_models_pytorch as smp

def train_and_validate(
    model: nn.Module,
    train_loader: torch.utils.data.DataLoader,
    val_loader: torch.utils.data.DataLoader,
    num_epochs: int,
    learning_rate: float = 1e-3,
    device: torch.device = torch.device('cpu')
) -> None:
    model.to(device)

    dice_loss_fn = smp.losses.DiceLoss(mode='multiclass')
    ce_loss_fn   = nn.CrossEntropyLoss()
    optimizer    = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(1, num_epochs + 1):
        model.train()
        total_train_loss = 0.0
        for images, masks in tqdm(train_loader, desc=f"Train Epoch {epoch}", leave=False):
            images = images.to(device)
            masks  = masks.to(device)

            optimizer.zero_grad()
            logits = model(images)
            loss   = dice_loss_fn(logits, masks) + ce_loss_fn(logits, masks)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)

        metrics = evaluate_segmentation(
            model, val_loader, device, num_classes=model.num_classes if hasattr(model, 'num_classes') else masks.max().item()+1
        )

        print(
            f"Epoch {epoch}/{num_epochs} | "
            f"Train Loss: {avg_train_loss:.4f} | "
            f"Val Loss: {metrics['loss']:.4f} | "
            f"IoU: {metrics['iou']:.4f} | "
            f"F1: {metrics['f1_score']:.4f}"
        )


In [64]:
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

train_transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(p=0.5),
    A.RandomCrop(224, 224),
    A.Normalize(),
    ToTensorV2()
])
val_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(),
    ToTensorV2()
])

train_ds = CamVidDataset(
    images_dir="data/train",
    masks_dir ="data/trainannot",
    transform=train_transform
)
val_ds = CamVidDataset(
    images_dir="data/val",
    masks_dir ="data/valannot",
    transform=val_transform
)

train_loader = DataLoader(
    train_ds,
    batch_size=32,
    shuffle=True,
    pin_memory=True
)
val_loader = DataLoader(
    val_ds,
    batch_size=32,
    shuffle=False,
    pin_memory=True
)

print(f"Train samples: {len(train_ds)}")
print(f"Val   samples: {len(val_ds)}")

Train samples: 367
Val   samples: 101


In [66]:
NUM_CLASSES = 32
model_unet = smp.Unet(
   encoder_name="resnet18",
    encoder_weights="imagenet",
    classes=NUM_CLASSES
).to(device)

train_and_validate(model_unet, train_loader, val_loader, num_epochs=5, learning_rate=1e-3)

                                                              

Epoch 1/5 | Train Loss: 3.1451 | Val Loss: 9.8100 | IoU: 0.0326 | F1: 0.0631


                                                              

Epoch 2/5 | Train Loss: 1.9568 | Val Loss: 1.5185 | IoU: 0.6539 | F1: 0.7906


                                                              

Epoch 3/5 | Train Loss: 1.2732 | Val Loss: 1.2162 | IoU: 0.6490 | F1: 0.7870


                                                              

Epoch 4/5 | Train Loss: 0.9889 | Val Loss: 0.9939 | IoU: 0.6542 | F1: 0.7908


                                                              

Epoch 5/5 | Train Loss: 0.8551 | Val Loss: 0.9008 | IoU: 0.7228 | F1: 0.8385


### Оценка качества сверточной модели

Получаем хороший результат = 0.8385

### Обучение трансформерной модели

In [68]:
model_segformer = smp.Segformer(
    encoder_name="mit_b0",
    encoder_weights="imagenet",
    in_channels=3,
    classes=NUM_CLASSES,
    activation=None
).to(device)

train_and_validate(model_segformer, train_loader, val_loader, num_epochs=5, learning_rate=1e-3)

                                                              

Epoch 1/5 | Train Loss: 1.7280 | Val Loss: 1.1746 | IoU: 0.6308 | F1: 0.7733


                                                              

Epoch 2/5 | Train Loss: 0.8911 | Val Loss: 0.7963 | IoU: 0.7098 | F1: 0.8296


                                                              

Epoch 3/5 | Train Loss: 0.7724 | Val Loss: 0.7107 | IoU: 0.7208 | F1: 0.8372


                                                              

Epoch 4/5 | Train Loss: 0.6837 | Val Loss: 0.7111 | IoU: 0.7087 | F1: 0.8288


                                                              

Epoch 5/5 | Train Loss: 0.6184 | Val Loss: 0.5679 | IoU: 0.7746 | F1: 0.8725


### Оценка качества трансформерной модели

Получаем хороший результат = 0.8725

## 3. Улучшение бейзлайна

### Гипотеза

Добавление цветовых искажений, случайных поворотов и обрезок — увеличат разнообразие обучающих примеров и улучшат обобщающую способность модели. Переход на оптимизатор AdamW в связке с планировщиком скорости обучения (scheduler) обеспечит более стабилизированную и адаптивную динамику обновления весов, что поможет избежать переобучения. 

### Обучение сверточной модели с использованием гипотез

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

improv_transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ColorJitter(p=0.5),
    A.RandomCrop(224, 224),
    A.Normalize(),
    ToTensorV2()
])

model_unet = smp.Unet(
   encoder_name="resnet18",
    encoder_weights="imagenet",
    classes=NUM_CLASSES
).to(device)

optimizer = optim.AdamW(model_unet.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
dice_loss = smp.losses.DiceLoss(mode='multiclass')
ce_loss   = nn.CrossEntropyLoss()

In [None]:
from typing import Optional

def train_and_validate(
    model: nn.Module,
    train_loader: torch.utils.data.DataLoader,
    val_loader:   torch.utils.data.DataLoader,
    num_epochs:   int,
    optimizer,
    scheduler:    Optional[optim.lr_scheduler._LRScheduler] = None,
    device:       torch.device = torch.device('cpu')
) -> None:
    model.to(device)

    dice_loss_fn = smp.losses.DiceLoss(mode='multiclass')
    ce_loss_fn   = nn.CrossEntropyLoss()

    for epoch in range(1, num_epochs + 1):
        current_lr = optimizer.param_groups[0]['lr']
        print(f"\nEpoch {epoch}/{num_epochs} — LR: {current_lr:.2e}")

        model.train()
        total_train_loss = 0.0
        for images, masks in tqdm(train_loader, desc="  Train", leave=False):
            images = images.to(device)
            masks  = masks.to(device)

            optimizer.zero_grad()
            logits = model(images)
            loss   = dice_loss_fn(logits, masks) + ce_loss_fn(logits, masks)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)

        metrics = evaluate_segmentation(
            model,
            val_loader,
            device,
            num_classes=getattr(model, 'num_classes', masks.max().item() + 1)
        )

        print(
            f"  Train Loss: {avg_train_loss:.4f}  "
            f"| Val Loss: {metrics['loss']:.4f}  "
            f"| IoU: {metrics['iou']:.4f}  "
            f"| F1: {metrics['f1_score']:.4f}"
        )

        if scheduler is not None:
            if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
                scheduler.step(metrics['loss'])
            else:
                scheduler.step()

In [73]:
train_and_validate(model_unet, train_loader, val_loader, num_epochs=5, learning_rate=1e-3, scheduler=scheduler)


Epoch 1/5 — LR: 1.00e-03


                                                        

  Train Loss: 0.8596  | Val Loss: 1.0574  | IoU: 0.6430  | F1: 0.7823

Epoch 2/5 — LR: 1.00e-03


                                                        

  Train Loss: 0.7541  | Val Loss: 0.7783  | IoU: 0.7381  | F1: 0.8485

Epoch 3/5 — LR: 1.00e-03


                                                        

  Train Loss: 0.6973  | Val Loss: 0.8140  | IoU: 0.7183  | F1: 0.8356

Epoch 4/5 — LR: 1.00e-03


                                                        

  Train Loss: 0.6849  | Val Loss: 0.7010  | IoU: 0.7421  | F1: 0.8511

Epoch 5/5 — LR: 1.00e-03


                                                        

  Train Loss: 0.6637  | Val Loss: 0.8330  | IoU: 0.7115  | F1: 0.8294


### Оценка качества улучшенной сверточной модели

Получаем F1 = 0.8294, гипотезы не улучшили бейзлайн модели

### Обучение трансформерной модели с использованием гипотез

In [None]:
model_segformer = smp.Segformer(
    encoder_name="mit_b0",
    encoder_weights="imagenet",
    in_channels=3,
    classes=NUM_CLASSES,
    activation=None
).to(device)

optimizer = optim.AdamW(model_unet.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
dice_loss = smp.losses.DiceLoss(mode='multiclass')
ce_loss   = nn.CrossEntropyLoss()

In [None]:
train_and_validate(model_segformer, train_loader, val_loader, num_epochs=5, optimizer=optimizer, scheduler=scheduler)


Epoch 1/5 — LR: 1.00e-03


                                                        

  Train Loss: 0.7447  | Val Loss: 0.5606  | IoU: 0.7748  | F1: 0.8728

Epoch 2/5 — LR: 1.00e-03


                                                        

  Train Loss: 0.6014  | Val Loss: 0.5525  | IoU: 0.7748  | F1: 0.8727

Epoch 3/5 — LR: 1.00e-03


                                                        

  Train Loss: 0.5436  | Val Loss: 0.5150  | IoU: 0.7904  | F1: 0.8828

Epoch 4/5 — LR: 1.00e-03


                                                        

  Train Loss: 0.5140  | Val Loss: 0.5006  | IoU: 0.7952  | F1: 0.8856

Epoch 5/5 — LR: 1.00e-03


                                                        

  Train Loss: 0.4887  | Val Loss: 0.4862  | IoU: 0.8035  | F1: 0.8907


### Оценка качества улучшенной трансформерной модели

Получаем F1 = 0.8907, что лучше базовой модели

## 4. Имплементация алгоритма машинного обучения

### Самостоятельная имплементация модели

In [76]:
import torch
import torch.nn as nn
import torch.optim as optim
import segmentation_models_pytorch as smp
from tqdm import tqdm
from segmentation_models_pytorch.metrics.functional import get_stats, iou_score, f1_score


class DoubleConvBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int):
        super().__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.double_conv(x)


class CustomUNet(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        self.down1 = DoubleConvBlock(3, 64)
        self.down2 = DoubleConvBlock(64, 128)
        self.down3 = DoubleConvBlock(128, 256)
        self.bottleneck = DoubleConvBlock(256, 512)

        self.pool = nn.MaxPool2d(kernel_size=2)
        self.up3  = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.up2  = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.up1  = nn.ConvTranspose2d(128,  64, kernel_size=2, stride=2)

        self.upconv3 = DoubleConvBlock(512, 256)
        self.upconv2 = DoubleConvBlock(256, 128)
        self.upconv1 = DoubleConvBlock(128,  64)

        self.classifier = nn.Conv2d(64, num_classes, kernel_size=1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x1 = self.down1(x)
        x2 = self.down2(self.pool(x1))
        x3 = self.down3(self.pool(x2))
        x4 = self.bottleneck(self.pool(x3))

        u3 = self.upconv3(torch.cat([self.up3(x4), x3], dim=1))
        u2 = self.upconv2(torch.cat([self.up2(u3), x2], dim=1))
        u1 = self.upconv1(torch.cat([self.up1(u2), x1], dim=1))

        return self.classifier(u1)

class CustomSegmentationTransformer(nn.Module):
    def __init__(
        self,
        img_size: int = 224,
        patch_size: int = 32,
        in_channels: int = 3,
        embed_dim: int = 128,
        num_heads: int = 4,
        num_layers: int = 2,
        num_classes: int = 32
    ):
        super().__init__()
        num_patches = (img_size // patch_size) ** 2

        self.patch_embed = nn.Conv2d(
            in_channels, embed_dim,
            kernel_size=patch_size, stride=patch_size
        )
        self.positional_embedding = nn.Parameter(
            torch.zeros(1, num_patches, embed_dim)
        )

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim,
            nhead=num_heads,
            dim_feedforward=embed_dim * 2,
            dropout=0.1,
            activation='gelu',
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(embed_dim, embed_dim,
                               kernel_size=patch_size,
                               stride=patch_size),
            nn.ReLU(inplace=True),
            nn.Conv2d(embed_dim, embed_dim // 2, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(embed_dim // 2, num_classes, kernel_size=1)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.patch_embed(x)
        B, C, H, W = x.shape

        x = x.flatten(2).transpose(1, 2) + self.positional_embedding
        x = self.transformer_encoder(x)
        x = x.transpose(1, 2).view(B, C, H, W)

        return self.decoder(x)


def evaluate_custom(
    model: nn.Module,
    loader: torch.utils.data.DataLoader,
    device: torch.device,
    num_classes: int
) -> dict[str, float]:
    model.eval()
    dice_fn = smp.losses.DiceLoss(mode='multiclass')
    ce_fn   = nn.CrossEntropyLoss()

    sum_loss = 0.0
    sum_iou  = 0.0
    sum_f1   = 0.0
    count    = 0

    with torch.no_grad():
        for inputs, targets in loader:
            inputs  = inputs.to(device)
            targets = targets.to(device)

            logits = model(inputs)
            loss   = dice_fn(logits, targets) + ce_fn(logits, targets)
            sum_loss += loss.item()

            preds = logits.argmax(dim=1)
            tp, fp, fn, tn = get_stats(
                preds, targets,
                mode='multiclass',
                num_classes=num_classes
            )
            sum_iou += iou_score(tp, fp, fn, tn, reduction='micro').item()
            sum_f1  += f1_score(tp, fp, fn, tn, reduction='micro').item()
            count  += 1

    return {
        'loss': sum_loss / count,
        'iou':  sum_iou  / count,
        'f1':   sum_f1   / count
    }

def train_custom_model(
    model: nn.Module,
    train_loader: torch.utils.data.DataLoader,
    val_loader:   torch.utils.data.DataLoader,
    num_epochs:   int,
    learning_rate: float                 = 1e-3,
    scheduler:    Optional[optim.lr_scheduler._LRScheduler] = None,
    device:       torch.device            = torch.device('cpu')
) -> None:
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    dice_loss_fn = smp.losses.DiceLoss(mode='multiclass')
    ce_loss_fn   = nn.CrossEntropyLoss()

    for epoch in range(1, num_epochs + 1):
        current_lr = optimizer.param_groups[0]['lr']
        print(f"\nEpoch {epoch}/{num_epochs} — LR: {current_lr:.2e}")

        model.train()
        total_train_loss = 0.0
        for inputs, targets in tqdm(train_loader, desc="  Train", leave=False):
            inputs  = inputs.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            logits = model(inputs)
            loss   = dice_loss_fn(logits, targets) + ce_loss_fn(logits, targets)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)

        metrics = evaluate_custom(
            model, val_loader, device,
            num_classes=getattr(model.classifier, 'out_channels', targets.max().item()+1)
        )

        print(
            f"  Train Loss: {avg_train_loss:.4f}  "
            f"| Val Loss: {metrics['loss']:.4f}  "
            f"| IoU: {metrics['iou']:.4f}  "
            f"| F1: {metrics['f1']:.4f}"
        )

        if scheduler is not None:
            if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
                scheduler.step(metrics['loss'])
            else:
                scheduler.step()

### Обучение самостоятельной сверточной модели

In [77]:
unet_custom = CustomUNet(num_classes=NUM_CLASSES).to(device)

train_and_validate(unet_custom, train_loader, val_loader, num_epochs=5, learning_rate=1e-3)


Epoch 1/5 — LR: 1.00e-03


                                                        

  Train Loss: 3.6430  | Val Loss: 2.9532  | IoU: 0.1015  | F1: 0.1842

Epoch 2/5 — LR: 1.00e-03


                                                        

  Train Loss: 2.5770  | Val Loss: 2.2999  | IoU: 0.2084  | F1: 0.3439

Epoch 3/5 — LR: 1.00e-03


                                                        

  Train Loss: 2.1316  | Val Loss: 2.1289  | IoU: 0.3056  | F1: 0.4671

Epoch 4/5 — LR: 1.00e-03


                                                        

  Train Loss: 2.0523  | Val Loss: 2.0926  | IoU: 0.2079  | F1: 0.3432

Epoch 5/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.9555  | Val Loss: 2.0175  | IoU: 0.2515  | F1: 0.4013


### Оценка качества самостоятельной сверточной модели

Получаем accuracy = 0.4013, что довольно неплохо, но ощутимо ниже готовой модели 

### Обучение самостоятельной сверточной модели

In [78]:
transformer_custom = CustomSegmentationTransformer(
    img_size=224,
    patch_size=32,
    in_channels=3,
    embed_dim=128,
    num_heads=4,
    num_layers=2,
    num_classes=NUM_CLASSES
).to(device)

train_and_validate(transformer_custom, train_loader, val_loader, num_epochs=5, learning_rate=1e-3)


Epoch 1/5 — LR: 1.00e-03


                                                        

  Train Loss: 3.0350  | Val Loss: 2.2780  | IoU: 0.1961  | F1: 0.3268

Epoch 2/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.9886  | Val Loss: 2.1153  | IoU: 0.2433  | F1: 0.3909

Epoch 3/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.8527  | Val Loss: 1.9976  | IoU: 0.2761  | F1: 0.4322

Epoch 4/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.7617  | Val Loss: 2.0210  | IoU: 0.2827  | F1: 0.4401

Epoch 5/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.6984  | Val Loss: 1.8558  | IoU: 0.3649  | F1: 0.5336


### Оценка качества самостоятельной трансформерной модели

Получаем accuracy = 0.5336, что тоже довольно неплохо, но ощутимо ниже готовой модели 

### Обучение самостоятельной сверточной модели с использованием гипотез 

In [79]:
optimizer = optim.AdamW(unet_custom.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
dice_loss = smp.losses.DiceLoss(mode='multiclass')
ce_loss   = nn.CrossEntropyLoss()

train_and_validate(unet_custom, train_loader, val_loader, num_epochs=5, learning_rate=1e-3, scheduler=scheduler)


Epoch 1/5 — LR: 1.00e-03




  Train Loss: 2.6802  | Val Loss: 2.0666  | IoU: 0.2467  | F1: 0.3952

Epoch 2/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.8444  | Val Loss: 2.0031  | IoU: 0.2997  | F1: 0.4608

Epoch 3/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.6470  | Val Loss: 1.7545  | IoU: 0.3792  | F1: 0.5496

Epoch 4/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.7548  | Val Loss: 1.9516  | IoU: 0.3353  | F1: 0.5018

Epoch 5/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.7235  | Val Loss: 1.8066  | IoU: 0.3609  | F1: 0.5302


### Оценка качества улучшенной самостоятельной сверточной модели

Получаем F1 = 0.5302 - примененные гипотезы помогли улучшить бейзлайн

### Обучение самостоятельной трансформерной модели с использованием гипотез 

In [80]:
optimizer = optim.AdamW(transformer_custom.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
dice_loss = smp.losses.DiceLoss(mode='multiclass')
ce_loss   = nn.CrossEntropyLoss()

train_and_validate(transformer_custom, train_loader, val_loader, num_epochs=5, learning_rate=1e-3, scheduler=scheduler)


Epoch 1/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.8159  | Val Loss: 1.9252  | IoU: 0.3284  | F1: 0.4936

Epoch 2/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.6770  | Val Loss: 1.7578  | IoU: 0.3748  | F1: 0.5446

Epoch 3/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.6087  | Val Loss: 1.7069  | IoU: 0.3791  | F1: 0.5489

Epoch 4/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.5697  | Val Loss: 1.6825  | IoU: 0.3864  | F1: 0.5567

Epoch 5/5 — LR: 1.00e-03


                                                        

  Train Loss: 1.5374  | Val Loss: 1.6990  | IoU: 0.3695  | F1: 0.5390


### Оценка качества улучшенной самостоятельной трансформеной модели

Получаем F1 = 0.5390 - примененные гипотезы помогли немного улучшить бейзлайн