## 0. Подготовка данных и импорт библиотек

In [None]:
!pip install -U git+https://github.com/qubvel/segmentation_models.pytorch

Collecting git+https://github.com/qubvel/segmentation_models.pytorch
  Cloning https://github.com/qubvel/segmentation_models.pytorch to /tmp/pip-req-build-gvpjx393
  Running command git clone -q https://github.com/qubvel/segmentation_models.pytorch /tmp/pip-req-build-gvpjx393
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting efficientnet-pytorch==0.6.3
  Downloading efficientnet_pytorch-0.6.3.tar.gz (16 kB)
Collecting pretrainedmodels==0.7.4
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[K     |████████████████████████████████| 58 kB 3.2 MB/s 
[?25hCollecting timm==0.4.12
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
[K     |████████████████████████████████| 376 kB 11.2 MB/s 
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Building wheels for collected packages: segmentation-models-pytorch, efficientnet-pytorch, pretrainedmode

In [None]:
import os
import json
import nibabel as nib
import numpy as np
from tqdm.notebook import tqdm
import torchvision
import random

import torch
import torch.nn as nn
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import Dataset, DataLoader
import albumentations as A # Будем использовать для аугментации данных
import segmentation_models_pytorch as smp

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu" # Определяем доступность gpu

device = torch.device(device)
print(device)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

#Расположение файлов
core_path = "./drive/MyDrive/covid_ct/" # Определяем пути до файлов
path = core_path + "data/data/"
onlyfiles = [f for f in listdir(path + "images") if isfile(join(path + "images", f))]

## 1. Работа с датасетом

In [None]:
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir(path + "images") if isfile(join(path + "images", f))]
onlyfiles[:5] # Получаем список всех файлов

['study_0255.nii',
 'study_0256.nii',
 'study_0257.nii',
 'study_0258.nii',
 'study_0260.nii']

In [None]:
class CovidDataset(Dataset):
    def __init__(self, X_data, without_covid_max=9999999):
        # Загружаем сканы кт
        path_images = os.path.join(path, 'images')
        path_labels = os.path.join(path, 'labels')
        
        # Подгружаем json с инфой по разметке
        with open(core_path + 'training_data.json', 'r') as f:
            dict_training = json.load(f)

        self.X = [] 
        self.Y = []
        without_covid = 0
        for entry in tqdm(dict_training):
            image = nib.load(os.path.join(path_images, entry['image'][:-3])) # Загружаем конкретный кт-скан по названию из json
            label = nib.load(os.path.join(path_labels, entry['label'][:-3])) # Загружаем лейблы/разметку для кт-скана
            image = torch.tensor(image.get_fdata(), dtype=torch.uint8).transpose(1, 2).transpose(0, 1) # Меняем размерность с [43, 512, 512]
            label = torch.tensor(label.get_fdata(), dtype=torch.uint8).transpose(1, 2).transpose(0, 1) # на [512, 512, 43] для всех картинок
            
            
            if entry['image'][:-3] in X_data: # Если этот кт-скан в трейне - загружаем его туда
                for i in range(len(image)): # Пробегаемся по всем слоям в нужном кт-скане image
                    if label[i].sum() != 0:
                        self.X.append(image[i]) # Добавляем отдельные картинки
                        self.Y.append(label[i])
                    else:
                        if without_covid >= without_covid_max:
                            continue
                        else:
                            without_covid += 1
                            self.X.append(image[i]) # Добавляем отдельные картинки
                            self.Y.append(label[i])
    
    
    def __len__(self):
        return len(self.X)
    
    
    def __getitem__(self, idx):
        # Делаем случайную аугментацию
        # Метод делает аугментацию как для image - нашего скана слоя, так и для его разметки
        # Для начала определяем поворот на угол...
        degrees = [-35, -30, -25, -20, -15, -10, -5, 0, 5, 10, 15, 20, 25, 30, 35]
        X = self.X[idx]
        y = self.Y[idx]
        X = X.type(torch.float)
        y = y.type(torch.float)
        X = (torch.Tensor(np.array([X.numpy()]) / 255))
        y = (torch.Tensor(np.array([y.numpy()])))
        value = random.random()
        if random.random() > 0.5:
            value = random.random()
            if value > 0.5:
                X = torchvision.transforms.functional.vflip(X)
                y = torchvision.transforms.functional.vflip(y)
            else:
                X = torchvision.transforms.functional.hflip(X)
                y = torchvision.transforms.functional.hflip(y)
        value = random.random()
        if value >= 0.1:
            degree = random.choice(degrees)
            X = torchvision.transforms.functional.rotate(X, degree)
            y = torchvision.transforms.functional.rotate(y, degree)
        else:
            pass
        value = random.random()
        if value > 0.5:
            X = torchvision.transforms.RandomPerspective(distortion_scale=0.15, p=0.5, interpolation=2, fill=0)(X)
            y = torchvision.transforms.RandomPerspective(distortion_scale=0.15, p=0.5, interpolation=2, fill=0)(y)
        else:
            pass
        value = random.random()
        if value > 0.5:
            X = torchvision.transforms.GaussianBlur(1)(X)
            y = torchvision.transforms.GaussianBlur(1)(y)
        else:
            pass
        
        # Важно! Нельзя передать просто картинку (512, 512), так как используется свёртка по многим измерениям
        # Необходимо передать в формате [палитра, ширина, высота] - [1, 512, 512]
        return torch.Tensor(X), torch.Tensor(y) 
                                            

In [None]:
batch_size = 8

# Перемешаем названия файлов в случайном порядке (для генерации трэйна и валидации)
np.random.shuffle(onlyfiles) 
train_dataset = CovidDataset(onlyfiles[:33], 50)
valid_dataset = CovidDataset(onlyfiles[33:], 50)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

In [None]:
s = 1
for i in train_loader:
    print(len(train_loader))
    print(i[0].size())
    print(i[1].size())

    s += 1
    if s > 2:
        break
    

  "Argument interpolation should be of type InterpolationMode instead of int. "


61
torch.Size([8, 1, 512, 512])
torch.Size([8, 1, 512, 512])
61
torch.Size([8, 1, 512, 512])
torch.Size([8, 1, 512, 512])


Unet 

In [None]:
import torch.nn as nn

class Unet(nn.Module): # Определим структуру нейронной сети Unet
    def block_down(self, in_features, out_features):
        return nn.Sequential(*[nn.Conv2d(in_features, out_features, (3, 3), padding=1),
                              nn.ReLU(),
                              nn.BatchNorm2d(out_features)])
    
    def block_up(self, in_features, out_features):
        return nn.Sequential(*[nn.Conv2d(in_features, out_features, (3, 3), padding=1),
                              nn.ReLU(),
                              nn.BatchNorm2d(out_features)])
    
    
    def __init__(self):
        super(Unet, self).__init__()
        self.block_up11 = self.block_down(1, 32)
        self.block_up12 = self.block_down(32, 32)
        self.max_pooling11 = nn.MaxPool2d((2, 2), stride=(2, 2))
        
        self.block_up21 = self.block_down(32, 64)
        self.block_up22 = self.block_down(64, 64)
        self.max_pooling22 = nn.MaxPool2d((2, 2), stride=(2, 2))
        
        self.block_up31 = self.block_down(64, 128)
        self.block_up32 = self.block_down(128, 128)
        self.max_pooling33 = nn.MaxPool2d((2, 2), stride=(2, 2))
        
        self.block_up41 = self.block_down(128, 256)
        self.block_up42 = self.block_down(256, 256)
        self.max_pooling44 = nn.MaxPool2d((2, 2), stride=(2, 2))
        
        self.block_up51 = self.block_down(256, 512)
        self.block_up52 = self.block_down(512, 512)
        
        self.block_up61 = nn.Upsample(scale_factor=2)
        self.block_up62 = self.block_up(512, 256)
        self.block_up63 = self.block_up(512, 256)
        self.block_up64 = self.block_up(256, 256)
        
        self.block_up71 = nn.Upsample(scale_factor=2)
        self.block_up72 = self.block_up(256, 128)
        self.block_up73 = self.block_up(256, 128)
        self.block_up74 = self.block_up(128, 128)
        
        self.block_up81 = nn.Upsample(scale_factor=2)
        self.block_up82 = self.block_up(128, 64)
        self.block_up83 = self.block_up(128, 64)
        self.block_up84 = self.block_up(64, 64)
        
        self.block_up91 = nn.Upsample(scale_factor=2)
        self.block_up92 = self.block_up(64, 32)
        self.block_up93 = self.block_up(64, 32)
        self.block_up94 = self.block_up(32, 32)
        
        self.block_up100 = self.block_up(32, 1) 
        
    
    def forward(self, x):
        out = self.block_up11(x)
        out = self.block_up12(out)
        
        save1 = out.clone()
        
        out = self.max_pooling11(out)
        
        out = self.block_up21(out)
        out = self.block_up22(out)
        
        save2 = out.clone()
        
        out = self.max_pooling22(out)
        
        out = self.block_up31(out)
        out = self.block_up32(out)
        
        save3 = out.clone()
        
        out = self.max_pooling33(out)
        
        out = self.block_up41(out)
        out = self.block_up42(out)
        
        save4 = out.clone()
        
        out = self.max_pooling44(out)
        
        out = self.block_up51(out)
        out = self.block_up52(out)
        
        
        out = self.block_up61(out)
        out = self.block_up62(out)
        out = self.block_up63(torch.cat((out, save4), 1))
        out = self.block_up64(out)

        out = self.block_up71(out)
        out = self.block_up72(out)
        out = self.block_up73(torch.cat((out, save3), 1))
        out = self.block_up74(out)

        out = self.block_up81(out)
        out = self.block_up82(out)
        out = self.block_up83(torch.cat((out, save2), 1))
        out = self.block_up84(out)

        out = self.block_up91(out)
        out = self.block_up92(out)
        out = self.block_up93(torch.cat((out, save1), 1))
        out = self.block_up94(out)

        out = self.block_up100(out)
        out = nn.Sigmoid()(out)
        
        return out

In [None]:
import torch
import torch.nn.functional as F

In [None]:
class TverskyLoss(nn.Module):
    def __init__(self, alpha=0.7):
        super(TverskyLoss, self).__init__()
        self.alpha = alpha

    def forward(self, inputs, targets, smooth=1):
        y_pred = inputs
        y_true = targets
        y_true_pos = y_true.view(-1)
        y_pred_pos = y_pred.view(-1)
        true_pos = torch.sum(y_true_pos * y_pred_pos)
        false_neg = torch.sum(y_true_pos * (1 - y_pred_pos))
        false_pos = torch.sum((1 - y_true_pos) * y_pred_pos)
        return 1 - (true_pos + smooth) / (true_pos + self.alpha * false_neg + (1 - self.alpha) * false_pos + smooth)

Обучаем

In [None]:
import segmentation_models_pytorch as smp

use_previous_versions = False
previous_i = 0
path_to_model = "output/kaggle/working/"

# Проверяем, вдруг мы можем подгрузить уже обученную модель
if use_previous_versions:
    models_variation = []
    for put, papki, files in os.walk("."):
        for el in files:
            if "lungs_ct_model" in el:
                models_variation.append(el)
                
    # Название файла - lungs_ct_model_1.h5
    if len(models_variation) != 0:
        models_variation = sorted(models_variation, key=lambda x: - int(x.split("_")[-1].split(".")[0]))
        model = torch.load(models_variation[-1])
        previous_i = int(models_variation[0].split("_")[-1].split(".")[0])
        print("Загружена прошлая модель: {}".format(str(previous_i)))
    else:
        model = smp.UnetPlusPlus(encoder_name='resnet18', in_channels=1, classes=1, activation="tanh")
        print("Загружен непредобученный Unet++")
else:
    model = smp.UnetPlusPlus(encoder_name='resnet18', in_channels=1, classes=1, activation="sigmoid")
    print("Загружен непредобученный Unet++")


device = torch.device('cuda:0')
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

Загружен непредобученный Unet++


#### Метрики


In [None]:
def recall(output_batch, correct_batch, threshold=0.99):    
    output_numpy = output_batch.detach().numpy()
    correct_numpy = correct_batch.detach().numpy()
    
    amount_of_correct = np.count_nonzero(np.where(output_numpy > threshold, output_numpy, 0) + correct_numpy == 2)
    amount_all = np.count_nonzero(correct_numpy == 1)
    try:
        return amount_of_correct / amount_all
    except:
        return np.nan

def precision(output_batch, correct_batch, threshold=0.99):   
    output_numpy = output_batch.detach().numpy()
    correct_numpy = correct_batch.detach().numpy()
    
    amount_of_correct = np.count_nonzero(np.where(output_numpy > threshold, output_numpy, 0) + correct_numpy == 2)
    amount_all = np.count_nonzero(output_numpy == 1)
    try:
        return amount_of_correct / amount_all
    except:
        return np.nan
    
def f1_score(precision, recall):
    return 2 * (precision * recall) / (precision + recall)

In [None]:
num_epoch = 36
lr = 0.0005

tverskoy_loss = TverskyLoss(alpha=0.7)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lambda x: 0.9825)

In [None]:
losses = []

for epoch in tqdm(range(num_epoch)):
    epoch_losses = []
    
    # Обучаем модель
    for X, Y in train_loader:
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        output = model(X)
        
        loss = tverskoy_loss(output, Y)
        loss.backward()
        clip_grad_norm_(model.parameters(), 99999)
        
        optimizer.step()

        del X
        del Y
        torch.cuda.empty_cache()
        epoch_losses.append(loss.item())
        
    # Добавляем лосс
    common_loss = sum(epoch_losses) / len(epoch_losses)
    losses.append(common_loss)
    
    # Считаем метрики на валидации:
    valid_precision = []
    valid_recall = []
    for X, Y in valid_loader:
        X = X.to(device)
        rec = recall(model(X).cpu(), Y)
        prec = precision(model(X).cpu(), Y)
        if prec is not np.nan:
            valid_precision.append(prec)
        if rec is not np.nan:
            valid_recall.append(rec)
        del X
        del Y
    
    # Выводим информацию
    print("--" * 15)
    print("Epoch: {}".format(str(epoch)))
    print("Loss:\t\t {:7.5f}".format(common_loss))
    print("Learning rate: {:9.8f}".format(float(optimizer.state_dict()["param_groups"][0]["lr"])))
    
    try:
        prec = sum(valid_precision) / len(valid_precision)
        print("Precision:\t {:7.3%}".format(prec))
    except:
        print("Precision:\t No info")
        
    try:
        rec = sum(valid_recall) / len(valid_recall)
        print("Recall:\t\t {:7.3%}".format(rec))
    except:
        print("Recall:\t\t No info")
        
    try:
        print("F1-score: \t {:7.3f}".format(f1_score(prec, rec)))
    except:
        print("F1-score: No info".format(f1_score(prec, rec)))
    
    # Сохраняем модель и делаем шаг scheduler
    torch.save(model, "lungs_ct_model_" + str(epoch + previous_i) + ".h5")
    scheduler.step()

  0%|          | 0/125 [00:00<?, ?it/s]

  "Argument interpolation should be of type InterpolationMode instead of int. "


------------------------------
Epoch: 0
Loss:		 0.92792
Learning rate: 0.00050000
Precision:	 15.781%
Recall:		  3.959%
F1-score: 	   0.063
------------------------------
Epoch: 1
Loss:		 0.74122
Learning rate: 0.00049125
Precision:	 42.237%
Recall:		 17.581%
F1-score: 	   0.248
------------------------------
Epoch: 2
Loss:		 0.57727
Learning rate: 0.00048265
Precision:	 40.517%
Recall:		 12.874%
F1-score: 	   0.195
------------------------------
Epoch: 3
Loss:		 0.54698
Learning rate: 0.00047421
Precision:	 42.364%
Recall:		 25.073%
F1-score: 	   0.315
------------------------------
Epoch: 4
Loss:		 0.53944
Learning rate: 0.00046591
Precision:	 37.223%
Recall:		 26.181%
F1-score: 	   0.307
------------------------------
Epoch: 5
Loss:		 0.51866
Learning rate: 0.00045775
Precision:	 38.676%
Recall:		 27.744%
F1-score: 	   0.323
------------------------------
Epoch: 6
Loss:		 0.51270
Learning rate: 0.00044974
Precision:	 36.451%
Recall:		 27.064%
F1-score: 	   0.311
--------------------