In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Useful imports

In [None]:
!pip install segmentation_models_pytorch
! git clone https://github.com/Bjarten/early-stopping-pytorch.git
! mv ./early-stopping-pytorch ./lib

In [None]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, sampler
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import torchvision
import cv2
import re
import segmentation_models_pytorch as smp
from lib.pytorchtools import *

# **Creating Dataset**

### Creating cumstom Pytorch dataset

In [None]:
class NerveDataset(Dataset):
    def __init__(self, directory, pytorch=True, is_test=False):
        super().__init__()
        
        # Loop through the files in 'directory' folder and combine, into a dictionary, the masks
        self.files = []
        for file_name in directory.iterdir():
            
            if 'mask' in str(file_name):
                continue
                
            self.files.append(self.combine_files(file_name))
            
        # Sorting files list
        self.files = sorted(self.files, key=lambda file: int(re.search(r'\d+', str(file['image'])).group(0)))
        #print(self.files)
        
        self.pytorch = pytorch
        self.resize = torchvision.transforms.Resize((224,224),interpolation=Image.NEAREST)
        self.is_test = is_test
        
    def combine_files(self, file_name: Path):
        
        files = {
            'image': file_name, 
            'mask': '..' + str(file_name).split('.')[2] + '_mask.tif',
        }

        return files
                                       
    def __len__(self):
        
        return len(self.files)
     
    def open_as_array(self, idx, invert=False):
        
        raw_image = self.resize(Image.open(self.files[idx]['image']))
        raw_image = raw_image = np.stack([ np.array(raw_image) ], axis=2)
    
        if invert:
            raw_image = raw_image.transpose((2,0,1))
    
        # normalize
        return (raw_image / np.iinfo(raw_image.dtype).max)
    

    def open_mask(self, idx, add_dims=False):
        
        raw_mask = self.resize(Image.open(self.files[idx]['mask']))
        raw_mask = np.array(raw_mask)
        raw_mask = np.where(raw_mask==255, 1, 0)
        
        return np.expand_dims(raw_mask, 0) if add_dims else raw_mask
    
    def __getitem__(self, idx):
        
        x = torch.tensor(self.open_as_array(idx, invert=self.pytorch), dtype=torch.float32)
        
        if not self.is_test:    
            y = torch.tensor(self.open_mask(idx, add_dims=True), dtype=torch.torch.float32)
            return x, y
        
        return x
    
    def open_as_pil(self, idx):
        
        arr = 256*self.open_as_array(idx)
        
        return Image.fromarray(arr.astype(np.uint8), 'L')
    
    def __repr__(self):
        s = 'Dataset class with {} files'.format(self.__len__())

        return s


In [None]:
images_path = Path('../input/ultrasound-nerve-segmentation/train')
data = NerveDataset(images_path)
len(data)

In [None]:
fig, ax = plt.subplots(1,2, figsize=(10,9))
ax[0].imshow(data.open_as_array(5))
ax[1].imshow(data.open_mask(5))

### Split data to train and validation sets

In [None]:
split_rate = 0.7
train_ds_len = int(len(data) * split_rate)
valid_ds_len = len(data) - train_ds_len

train_ds, valid_ds = torch.utils.data.random_split(data, (train_ds_len, valid_ds_len))

print(f'Train dataset length: {len(train_ds)}\n')
print(f'Validation dataset length: {len(valid_ds)}\n')
print(f'All data length: {len(data)}\n')

# Model configuring

In [None]:
# Model
unet = model = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=1,
    classes=1,
    activation = "sigmoid"
)
# Device type
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Params
learning_rate = 0.001
epochs = 50
metrics = [smp.utils.metrics.IoU()]
# Loss & optimizer
loss_function = smp.utils.losses.DiceLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Scheduler & stopper
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
stopper = EarlyStopping(patience=3)
# Train & vallidation functions
train_epoch = smp.utils.train.TrainEpoch(model,
                                          loss=loss_function,
                                          optimizer=optimizer,
                                          metrics=metrics,
                                          device=device,
                                          verbose=True)
val_epoch = smp.utils.train.ValidEpoch(model,
                                          loss=loss_function,
                                          metrics=metrics,
                                          device=device,
                                          verbose=True)

# Data loaders
train_dl = DataLoader(train_ds, batch_size=16, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=16, shuffle=True)

# Model training

In [None]:
train_loss = []
val_loss = []

train_acc = []
val_acc = []

for epoch in range(epochs):
    # training proccess
    print('\nEpoch: {}'.format(epoch))
    train_log = train_epoch.run(train_dl)
    val_log = val_epoch.run(valid_dl)

    scheduler.step()

    train_loss.append(train_log[loss_function.__name__])
    val_loss.append(val_log[loss_function.__name__])

    train_acc.append(train_log['iou_score']) 
    val_acc.append(val_log['iou_score'])

    stopper(val_log[loss_function.__name__], model)
    if stopper.early_stop:
        break

# Train results

In [None]:
plt.figure(figsize=(10, 10))
plt.plot(range(len(train_loss)), train_loss, label='tain_loss')
plt.plot(range(len(val_loss)), val_loss, label='val_loss')
plt.legend()
plt.title('Train and validation losses for each epoch', fontdict={'fontsize': 30,}, pad=20)

In [None]:
plt.figure(figsize=(10, 10))
plt.plot(range(len(train_acc)), train_acc, label='train_acc')
plt.plot(range(len(val_acc)), val_acc, label='val_acc')
plt.legend()
plt.title('Train and validation accuracy for each epoch', fontdict={'fontsize': 30,}, pad=20)

In [None]:
test_images_path = Path('../input/ultrasound-nerve-segmentation/test')
test_data = NerveDataset(test_images_path, is_test=True)
test_dl = DataLoader(test_data, batch_size=1, shuffle=False)
len(test_data)

In [None]:
unet.train(False)

dataiter = iter(valid_dl)

fx, ax = plt.subplots(3, 5, figsize=(10,10))

for i in range(5):
    images, masks = dataiter.next()
    
    ax[0][i].imshow(np.transpose(images[0], (1, 2, 0)))
    ax[1][i].imshow(np.transpose(masks[0], (1, 2, 0)))
    
    if torch.cuda.is_available():
        images = images.cuda()
    
    pred = unet(images)
    pred = pred.cpu().detach().numpy()
    # print(pred)
    ax[2][i].imshow(np.transpose(pred[0], (1, 2, 0)))
    
    #ax[3][i].imshow(pred[0][1] - pred[0][0])
    
    
    

In [None]:
def rle_encoding(x):
    dots = np.where(x.T.flatten()==1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b+1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

In [None]:
encodings = []
counter = 0

# fx, ax = plt.subplots(1, 5, figsize=(10,10))

for image in test_dl:
    
    if torch.cuda.is_available():
        image = image.cuda()
        
    pr_mask = unet(image)
    
    pr_mask = pr_mask[0]
    
    pr_mask = pr_mask.squeeze().cpu().detach().numpy().round().astype(np.uint8)
    pr_mask = cv2.resize(pr_mask,(580,420), interpolation=cv2.INTER_CUBIC)

    encodings.append(rle_encoding(pr_mask))
    
    print(f'Image: {counter} ')
    
    counter += 1
#     ax[counter].imshow(pr_mask)
#     if counter == 5:
#         break


In [None]:
len(encodings)

In [None]:
df_submission = pd.DataFrame(columns=["img", "pixels"])
for i, encoding in enumerate(encodings):
    pixels = ' '.join(map(str, encoding))
    df_submission.loc[i] = [str(i+1), pixels]

df_submission.to_csv('./submission.csv', index=False)
print('Done!')