# 2 версия

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
from glob import glob

TRAIN_IMG_DIR = "/content/drive/MyDrive/data/train/image"
TRAIN_MASK_DIR = "/content/drive/MyDrive/data/train/label"

VAL_IMG_DIR = "/content/drive/MyDrive/data/val/image"
VAL_MASK_DIR = "/content/drive/MyDrive/data/val/label"

print("Train images:", len(glob(TRAIN_IMG_DIR + "/*.npy")))
print("Train masks:", len(glob(TRAIN_MASK_DIR + "/*.npy")))
print("Val images:", len(glob(VAL_IMG_DIR + "/*.npy")))
print("Val masks:", len(glob(VAL_MASK_DIR + "/*.npy")))

# Пример вывода нескольких файлов для проверки
print("Train images sample:", glob(TRAIN_IMG_DIR + "/*.npy")[:5])
print("Train masks sample:", glob(TRAIN_MASK_DIR + "/*.npy")[:5])

Train images: 2975
Train masks: 2975
Val images: 445
Val masks: 500
Train images sample: ['/content/drive/MyDrive/data/train/image/2788.npy', '/content/drive/MyDrive/data/train/image/2784.npy', '/content/drive/MyDrive/data/train/image/2782.npy', '/content/drive/MyDrive/data/train/image/2778.npy', '/content/drive/MyDrive/data/train/image/2774.npy']
Train masks sample: ['/content/drive/MyDrive/data/train/label/2783.npy', '/content/drive/MyDrive/data/train/label/2790.npy', '/content/drive/MyDrive/data/train/label/2789.npy', '/content/drive/MyDrive/data/train/label/2777.npy', '/content/drive/MyDrive/data/train/label/279.npy']


In [None]:
import torch
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader
from glob import glob
import os
from tqdm import tqdm
import segmentation_models_pytorch as smp

In [None]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE = 4
NUM_CLASSES = 19
LR = 1e-3
EPOCHS = 25

Шаг 4. Определение аугментаций и датасета

In [None]:
train_transform = A.Compose([
    A.Resize(256, 512),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(256, 512),
    ToTensorV2(),
])

class SegmentationDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, mask_dir, transform):
        self.image_paths = sorted(glob(os.path.join(image_dir, '*.npy')))
        self.mask_paths = sorted(glob(os.path.join(mask_dir, '*.npy')))
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = np.load(self.image_paths[idx]).astype(np.uint8)
        mask = np.load(self.mask_paths[idx]).astype(np.uint8)
        mask[mask == 255] = 0  # 🛠️ заменяем "ignore" на фон

        augmented = self.transform(image=image, mask=mask)
        image = augmented['image'].float() / 255.0  # 🔄 нормализация в [0,1]
        mask = augmented['mask']

        return image, mask.long()

Шаг 5. Создание датасетов и загрузчиков с проверкой размеров

In [None]:
train_dataset = SegmentationDataset(TRAIN_IMG_DIR, TRAIN_MASK_DIR, train_transform)
val_dataset = SegmentationDataset(VAL_IMG_DIR, VAL_MASK_DIR, val_transform)

print("Train dataset size:", len(train_dataset))
print("Validation dataset size:", len(val_dataset))

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=0)

Train dataset size: 2975
Validation dataset size: 445


Шаг 6. Определение модели, оптимизатора, функции потерь и lr_scheduler

In [None]:
model = smp.Unet(encoder_name="resnet34", encoder_weights="imagenet", classes=NUM_CLASSES, activation=None).to(DEVICE)
loss_fn = smp.losses.DiceLoss(mode='multiclass')
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

Шаг 7. Определение функций обучения и валидации

In [None]:
def train_one_epoch(loader, model, optimizer, loss_fn):
    model.train()
    epoch_loss = 0
    for images, masks in tqdm(loader):
        print(images.dtype, images.shape)
        print(masks.dtype, masks.shape)
        images = images.to(DEVICE)
        masks = masks.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, masks)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(loader)


def evaluate(loader, model, loss_fn):
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, masks in loader:
            images = images.to(DEVICE)
            masks = masks.to(DEVICE).long()  # и тут тоже
            outputs = model(images)
            loss = loss_fn(outputs, masks)
            val_loss += loss.item()
    return val_loss / len(loader)

Шаг 8. Запуск обучения

In [None]:
for epoch in range(EPOCHS):
    train_loss = train_one_epoch(train_loader, model, optimizer, loss_fn)
    val_loss = evaluate(val_loader, model, loss_fn)
    scheduler.step()
    print(f"Epoch {epoch+1}/{EPOCHS}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

# После окончания обучения сохраняем модель
torch.save(model.state_dict(), "esnet_segmentation.pth")

  0%|          | 0/744 [00:00<?, ?it/s]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  0%|          | 1/744 [00:15<3:15:01, 15.75s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  0%|          | 2/744 [00:30<3:05:37, 15.01s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  0%|          | 3/744 [00:46<3:14:15, 15.73s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  1%|          | 4/744 [01:01<3:10:52, 15.48s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  1%|          | 5/744 [01:16<3:05:14, 15.04s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  1%|          | 6/744 [01:30<3:02:39, 14.85s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  1%|          | 7/744 [01:45<3:01:34, 14.78s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  1%|          | 8/744 [01:59<2:57:23, 14.46s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  1%|          | 9/744 [02:13<2:55:16, 14.31s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  1%|▏         | 10/744 [02:27<2:54:30, 14.26s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  1%|▏         | 11/744 [02:41<2:55:55, 14.40s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  2%|▏         | 12/744 [02:56<2:56:59, 14.51s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  2%|▏         | 13/744 [03:10<2:54:28, 14.32s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  2%|▏         | 14/744 [03:24<2:52:51, 14.21s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  2%|▏         | 15/744 [03:39<2:53:47, 14.30s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  2%|▏         | 16/744 [03:51<2:46:41, 13.74s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  2%|▏         | 17/744 [04:04<2:45:26, 13.65s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  2%|▏         | 18/744 [04:18<2:43:35, 13.52s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  3%|▎         | 19/744 [04:32<2:46:52, 13.81s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  3%|▎         | 20/744 [04:49<2:56:23, 14.62s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  3%|▎         | 21/744 [05:03<2:56:26, 14.64s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  3%|▎         | 22/744 [05:21<3:06:53, 15.53s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  3%|▎         | 23/744 [05:35<3:02:16, 15.17s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  3%|▎         | 24/744 [05:49<2:58:26, 14.87s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  3%|▎         | 25/744 [06:03<2:55:18, 14.63s/it]

torch.float32 torch.Size([4, 3, 256, 512])
torch.int64 torch.Size([4, 256, 512])


  3%|▎         | 25/744 [06:18<3:01:13, 15.12s/it]


KeyboardInterrupt: 

# Тут будет слишком долгое обучение, облегчим параметры

In [None]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE = 2
NUM_CLASSES = 19
LR = 1e-3
EPOCHS = 3

2. Аугментации (с уменьшенным разрешением)

In [None]:
train_transform = A.Compose([
    A.Resize(128, 256),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(128, 256),
    ToTensorV2(),
])

3. Dataset с ограничением на 300 файлов

In [None]:
class SegmentationDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, mask_dir, transform):
        self.image_paths = sorted(glob(os.path.join(image_dir, '*.npy')))[:300]
        self.mask_paths = sorted(glob(os.path.join(mask_dir, '*.npy')))[:300]
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = np.load(self.image_paths[idx]).astype(np.uint8)
        mask = np.load(self.mask_paths[idx]).astype(np.uint8)
        mask = np.clip(mask, 0, NUM_CLASSES - 1)  # важно
        augmented = self.transform(image=image, mask=mask)
        image = augmented['image']
        mask = augmented['mask']
        return image.float(), mask.long()

In [None]:
train_dataset = SegmentationDataset(TRAIN_IMG_DIR, TRAIN_MASK_DIR, train_transform)
val_dataset = SegmentationDataset(VAL_IMG_DIR, VAL_MASK_DIR, val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=2)

In [None]:
model = smp.FPN(encoder_name="resnet18", classes=NUM_CLASSES, activation=None).to(DEVICE)
loss_fn = smp.losses.DiceLoss(mode='multiclass')
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

In [None]:
def train_one_epoch(loader, model, optimizer, loss_fn):
    model.train()
    epoch_loss = 0
    for i, (images, masks) in enumerate(tqdm(loader)):
        images, masks = images.to(DEVICE), masks.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, masks)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        if i % 50 == 0:
            print(f"[Batch {i}/{len(loader)}] Loss: {loss.item():.4f}")
    return epoch_loss / len(loader)

def evaluate(loader, model, loss_fn):
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, masks in loader:
            images, masks = images.to(DEVICE), masks.to(DEVICE)
            outputs = model(images)
            loss = loss_fn(outputs, masks)
            val_loss += loss.item()
    return val_loss / len(loader)


In [None]:
for epoch in range(EPOCHS):
    train_loss = train_one_epoch(train_loader, model, optimizer, loss_fn)
    val_loss = evaluate(val_loader, model, loss_fn)
    scheduler.step()
    print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

torch.save(model.state_dict(), "segmentation_model.pth")

  1%|          | 1/150 [00:03<09:17,  3.74s/it]

[Batch 0/150] Loss: 0.7161


 34%|███▍      | 51/150 [06:35<01:55,  1.17s/it]

[Batch 50/150] Loss: 0.5330


 67%|██████▋   | 101/150 [07:23<00:46,  1.06it/s]

[Batch 100/150] Loss: 0.6716


100%|██████████| 150/150 [08:11<00:00,  3.27s/it]


Epoch 1/3 | Train Loss: 0.5886 | Val Loss: 0.5693


  1%|          | 1/150 [00:01<04:25,  1.78s/it]

[Batch 0/150] Loss: 0.5151


 34%|███▍      | 51/150 [00:48<01:26,  1.15it/s]

[Batch 50/150] Loss: 0.6020


 67%|██████▋   | 101/150 [01:37<00:55,  1.14s/it]

[Batch 100/150] Loss: 0.5840


100%|██████████| 150/150 [02:23<00:00,  1.04it/s]


Epoch 2/3 | Train Loss: 0.5740 | Val Loss: 0.5685


  1%|          | 1/150 [00:01<04:01,  1.62s/it]

[Batch 0/150] Loss: 0.6082


 34%|███▍      | 51/150 [00:49<01:44,  1.05s/it]

[Batch 50/150] Loss: 0.5046


 67%|██████▋   | 101/150 [01:36<00:45,  1.08it/s]

[Batch 100/150] Loss: 0.5740


100%|██████████| 150/150 [02:21<00:00,  1.06it/s]


Epoch 3/3 | Train Loss: 0.5638 | Val Loss: 0.5613
