In [None]:
# импорт необходимых библиотек
import math
import sys
import random
import os
import cv2


# для импорта и работы данными
import numpy as np
import pandas as pd

# визуализация
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.pyplot as mpplot

# нейросеть
import torch
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
import torchvision
from PIL import Image


In [None]:
tif_path = "../input/ultrasound-nerve-segmentation/train"

X = []
Y = []
for root, _, files in os.walk(tif_path):
    id = 0
    for f in files:
        if f.split("_")[-1] == "mask.tif":
            continue
        else:
            X.append(f)
            Y.append(f.split(".")[0]+"_mask.tif" )
            id+=1

ids = list(range(len(X)))

train_ids = random.sample(ids, int(len(ids)*0.7))
val_and_test_ids = [i for i in ids if i not in train_ids]
valid_ids = random.sample(val_and_test_ids, int(len(val_and_test_ids)*0.5))
test_ids = [i for i in val_and_test_ids if i not in valid_ids]

train_data = {"id":train_ids,"X":[X[i] for i in train_ids],"Y":[Y[i] for i in train_ids]}
val_data = {"id":valid_ids,"X":[X[i] for i in valid_ids], "Y":[Y[i] for i in valid_ids]}
test_data = {"id":test_ids,"X":[X[i] for i in test_ids], "Y":[Y[i] for i in test_ids]}

train_df = pd.DataFrame(train_data)
val_df = pd.DataFrame(val_data)
test_df = pd.DataFrame(test_data)

train_df.to_csv("train.csv")
val_df.to_csv("val.csv")
test_df.to_csv("test.csv")

In [None]:
class Dataset():
    def __init__(self, csv_file, root_dir, transform=None):
        self.class_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.class_frame)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.class_frame["X"][idx])
        mask_name = os.path.join(self.root_dir, self.class_frame["Y"][idx])
        
        image = cv2.resize(cv2.imread(img_name,cv2.IMREAD_GRAYSCALE), (224,224))/255
        mask = cv2.resize(cv2.imread(mask_name,cv2.IMREAD_GRAYSCALE), (224,224))/255
        
        image = np.reshape(image, (1,224,224))
        mask = np.reshape(mask, (1,224,224))

        return (image.astype(np.float32), mask.astype(np.float32))

In [None]:
train_dataset = Dataset("train.csv", tif_path) 
valid_dataset = Dataset("val.csv", tif_path) 
test_dataset = Dataset("test.csv", tif_path) 

In [None]:
def printResult(Ys, legend, title ="Title", Y='value'):
    for y in Ys:
        plt.plot(range(len(y)),y,ls = '--',marker = 'o')
    plt.ylabel(Y)
    plt.xlabel('epoch')
    plt.grid()
    plt.legend(legend, loc='best')
    plt.show()

In [None]:
!pip install segmentation_models_pytorch

In [None]:
import segmentation_models_pytorch as smp

In [None]:
!git clone https://github.com/Bjarten/early-stopping-pytorch.git

In [None]:
!mv early-stopping-pytorch tools

In [None]:
from tools.pytorchtools import *

In [None]:
epochs = 15
learning_rate = 0.001
batch_size = 16
device = "cuda"
lossesTrain = []
lossesVal = []

scoresTrain = []
scoresVal = []

stopper = EarlyStopping(patience=3)
    
metrics = [ smp.utils.metrics.IoU(threshold=0.5) ]

model = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=1,
    classes=1,
    activation = "sigmoid"
  )
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
exp_lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
loss_fn = smp.utils.losses.DiceLoss()

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                             batch_size=batch_size,
                                             shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                           batch_size=batch_size,
                                           shuffle=False)
  
trainepoch = smp.utils.train.TrainEpoch(model,
                                          loss=loss_fn,
                                          optimizer=optimizer,
                                          metrics=metrics,
                                          device=device,
                                          verbose=True)
validepoch = smp.utils.train.ValidEpoch(model,
                                          loss=loss_fn,
                                          metrics=metrics,
                                          device=device,
                                          verbose=True)

for epoch in range(epochs):
    # процесс тренировки
    print('\nEpoch: {}'.format(epoch))
    train_log = trainepoch.run(train_loader)
    val_log = validepoch.run(val_loader)

    exp_lr_scheduler.step()

    lossesTrain.append(train_log[loss_fn.__name__])
    lossesVal.append(val_log[loss_fn.__name__])

    scoresTrain.append(train_log['iou_score']) 
    scoresVal.append(val_log['iou_score'])

    stopper(val_log[loss_fn.__name__], model)
    if stopper.early_stop:
        break
graphic_data = np.array([lossesTrain, lossesVal, scoresTrain, scoresVal])

In [None]:
def rle_encoding(x):
    dots = np.where(x.T.flatten()==1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b+1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

In [None]:
testpath = "../input/ultrasound-nerve-segmentation/test"
imgs = [f for f in os.listdir(testpath)]
imgs = sorted( imgs, key=lambda s: int(s.split('.')[0]))

from tqdm.notebook import tqdm

encodings = []

for m in tqdm(imgs):
    x = cv2.imread(os.path.join(testpath, m),cv2.IMREAD_GRAYSCALE)

    x = np.reshape( cv2.resize(x,(224,224))/255 , (1,224,224)).astype(np.float32)

    x_tensor = torch.from_numpy(x).to(device).unsqueeze(0)
    pr_mask = model.predict(x_tensor)

    pr_mask = pr_mask.squeeze().cpu().numpy().round().astype(np.uint8)
    pr_mask = cv2.resize(pr_mask,(580,420), interpolation=cv2.INTER_CUBIC)

    encodings.append(rle_encoding(pr_mask))

In [None]:
df_submission = pd.DataFrame(columns=["img", "pixels"])
for i, encoding in enumerate(encodings):
    pixels = ' '.join(map(str, encoding))
    df_submission.loc[i] = [str(i+1), pixels]

df_submission.to_csv('submission.csv', index=False)

In [None]:
printResult([graphic_data[0],
             graphic_data[1]],
            ["Train loss", "Val loss"],
            title ="Train and valid Losses",
            Y='loss value')

In [None]:
printResult([graphic_data[2],
             graphic_data[3]],
            ["Train score", "Val score"],
            title ="Train and valid scores",
            Y='score value')