# Установка полезностей 

In [None]:
! pip install segmentation_models_pytorch

In [None]:
! git clone https://github.com/Bjarten/early-stopping-pytorch.git

In [None]:
! mv ./early-stopping-pytorch ./lib

# Импорты

In [None]:
# Торч
import torch
import torchvision
import segmentation_models_pytorch as smp
from lib.pytorchtools import *
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

# Данные
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Картинки
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.pyplot as mpplot
from PIL import Image

# Разное
import math
import sys
import cv2
import time
import copy
import random
import os 
from tqdm.notebook import tqdm

# Ход работы

In [None]:
# Фиксируем рандомы
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True

In [None]:
# Подготовка данных
data_folder = "/kaggle/input/ultrasound-nerve-segmentation/"
train_folder = os.path.join(data_folder, "train")
test_folder = os.path.join(data_folder, "test")

masks = [os.path.join(train_folder, i) for i in os.listdir(train_folder) if "_mask.tif" in i]
imgs = [i.replace("_mask","") for i in masks]

df = pd.DataFrame({"images":imgs,"masks":masks})

train_df, val_df = train_test_split(df,test_size = 0.20)

In [None]:
train_df.head(10)

In [None]:
val_df.head(10)

In [None]:
# Создание класса датасета
class UltrasoundDataset(Dataset):
    def __init__(self, df, transform=None):
        self.images = df.images.tolist()
        self.masks = df.masks.tolist()
        self.transform = transform
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        image_name = self.images[idx]
        mask_name = self.masks[idx]
       
        image = Image.open(image_name)
        mask = Image.open(mask_name)  
    
        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
    
        return image, mask

In [None]:
# Создание датасетов и даталоадеров
transform = transforms.Compose([
    transforms.Resize(size=(256, 256)),
    transforms.ToTensor()
])

batch_size = 32

train_dataset = UltrasoundDataset(train_df, transform)
val_dataset = UltrasoundDataset(val_df, transform)

train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=batch_size)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, num_workers=batch_size)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Используемое устройство: ", device)
print('Тренировочный сет:\n', "\tКоличество батчей: ", len(train_dataloader), "\n\tКоличество изображений в датасете: ", len(train_dataset))
print('Валидационный сет:\n', "\tКоличество батчей: ", len(val_dataloader), "\n\tКоличество изображений в датасете: ", len(val_dataset))

In [None]:
# Отрисовка изображений
def show_overlayed(data, n):
    fig = plt.figure(figsize=(25, 25))
        
    for i in range(1, n + 1):
        ax = fig.add_subplot(-(-n // 5), 5, i)
        
        image = data[i-1][0].permute(1, 2, 0).numpy()
        mask = data[i-1][1].permute(1, 2, 0).numpy()
        
        ax.imshow(image, cmap='gray')
        ax.imshow(mask, alpha=0.5, cmap='gray')
       
    fig.show()

In [None]:
show_overlayed(train_dataset, 25)

In [None]:
# Подготовка модели
encoder = 'densenet161'
encoder_w = 'imagenet'
activation = 'sigmoid'

model = smp.Unet(
    encoder_name=encoder,
    encoder_weights=encoder_w,
    in_channels=1,
    classes=1,
    activation=activation
)

loss_function = smp.utils.losses.DiceLoss()
metrics = [smp.utils.metrics.IoU()]
learning_rate = 0.001
epochs = 50

stopper = EarlyStopping(patience=3)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)

train_epoch = smp.utils.train.TrainEpoch(model,
                                          loss=loss_function,
                                          optimizer=optimizer,
                                          metrics=metrics,
                                          device=device,
                                          verbose=True)

val_epoch = smp.utils.train.ValidEpoch(model,
                                          loss=loss_function,
                                          metrics=metrics,
                                          device=device,
                                          verbose=True)

In [None]:
# Обучаем
model.to(device)

train_loss_history = []
val_loss_history= []
train_iou_history = []
val_iou_history = []

for epoch in range(epochs):
    print('\nEpoch: {}'.format(epoch))
    train_log = train_epoch.run(train_dataloader)
    val_log = val_epoch.run(val_dataloader)

    scheduler.step()

    train_loss_history.append(train_log[loss_function.__name__])
    val_loss_history.append(val_log[loss_function.__name__])

    train_iou_history.append(train_log['iou_score']) 
    val_iou_history.append(val_log['iou_score'])

    stopper(val_log[loss_function.__name__], model)
    if stopper.early_stop:
        break

In [None]:
# Смотрим на историю обучения 
def visualize_history(train, val, title):
    
    plt.plot(range(len(train)), train, label = 'Train')
    plt.plot(range(len(val)), val, label = 'Val')
    
    plt.ylabel(title)
    plt.xlabel('epoch')
    
    plt.legend()
    plt.show()

In [None]:
visualize_history(train_loss_history, val_loss_history, 'Loss')

In [None]:
visualize_history(train_iou_history, val_iou_history, 'IoU')

In [None]:
# Сжимаем
def rle_encoding(x):
    dots = np.where(x.T.flatten()==1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b+1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

In [None]:
# Предсказываем маски, сжимаем и записываем в цсв
imgs = [f for f in os.listdir(test_folder)]
imgs = sorted( imgs, key=lambda s: int(s.split('.')[0]))

submission_df = pd.DataFrame(columns=['img', 'pixels'])
model.to(device)
model.eval()

for i, img in enumerate(tqdm(imgs)):
    x = Image.open(os.path.join(test_folder, img))

    x = transform(x)

    x = x.unsqueeze(0).to(device)
    prediction = model.predict(x)

    prediction = prediction.cpu()
    prediction = transforms.Resize(size=(420, 580))(prediction)

    encoding = rle_encoding(prediction)

    pixels = ' '.join(map(str, encoding))
    submission_df.loc[i] = [str(i+1), pixels]

submission_df.to_csv('submission.csv', index=False)