In [None]:
!pip install segmentation-models-pytorch
!pip install albumentations
import segmentation_models_pytorch as smp


import pandas as pd
import numpy as np

import os
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A

from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR

#красивые картиночки
from PIL import Image
import matplotlib.pyplot as plt


Предобработка csv

In [None]:
path = '/kaggle/input/understanding_cloud_organization'
train = pd.read_csv(f'{path}/train.csv')
sub = pd.read_csv(f'{path}/sample_submission.csv')

train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])
sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])
id_mask_count = train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label']
id_mask_count = id_mask_count.apply(lambda x: x.split('_')[0]).value_counts()
id_mask_count = id_mask_count.reset_index().rename(columns={'index': 'img_id', 'Image_Label': 'count'})

train_ids, valid_ids = train_test_split(id_mask_count['img_id'].values,
                                        random_state=42, 
                                        stratify=id_mask_count['count'],
                                        test_size=0.1)
test_ids = sub['Image_Label'].apply(lambda x: x.split('_')[0]).drop_duplicates().values

train_ids[0:10]

In [None]:
# для ривования картинок
def get_img(x,
            path: str = '../input/understanding_cloud_organization',
            folder: str = 'train_images'):
    """
    Return image based on image name and folder.
    """
    data_folder = f"{path}/{folder}"
    image_path = os.path.join(data_folder, x)
    img = cv2.imread(image_path)
    return img

def dice(img1, img2):
    img1 = np.asarray(img1).astype(np.bool)
    img2 = np.asarray(img2).astype(np.bool)

    intersection = np.logical_and(img1, img2)

    return 2. * intersection.sum() / (img1.sum() + img2.sum())


def soft_dice_score(
    output: torch.Tensor, target: torch.Tensor, smooth: float = 0.0, eps: float = 1e-7, dims=None
) -> torch.Tensor:
    """
    :param output:
    :param target:
    :param smooth:
    :param eps:
    :return:
    Shape:
        - Input: :math:`(N, NC, *)` where :math:`*` means any number
            of additional dimensions
        - Target: :math:`(N, NC, *)`, same shape as the input
        - Output: scalar.
    """
    assert output.size() == target.size()
    if dims is not None:
        intersection = torch.sum(output * target, dim=dims)
        cardinality = torch.sum(output + target, dim=dims)
    else:
        intersection = torch.sum(output * target)
        cardinality = torch.sum(output + target)
    dice_score = (2.0 * intersection + smooth) / (cardinality + smooth).clamp_min(eps)
    return dice_score



def rle_decode(mask_rle: str = '', shape: tuple = (1400, 2100)):
    '''
    Decode rle encoded mask.

    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')

def make_mask(df: pd.DataFrame, image_name: str = 'img.jpg', shape: tuple = (1400, 2100)):
    """
    Create mask based on df, image name and shape.
    """
    encoded_masks = df.loc[df['im_id'] == image_name, 'EncodedPixels']
    masks = np.zeros((shape[0], shape[1], 4), dtype=np.float32)

    for idx, label in enumerate(encoded_masks.values):
        if label is not np.nan:
            mask = rle_decode(label)
            masks[:, :, idx] = mask

    return masks

#из cv2 в съедобную для торча
def to_tensor(x, **kwargs):
    """
    Convert image or mask.
    """
    return x.transpose(2, 0, 1).astype('float32')

def mask2rle(img):
    '''
    Convert mask to rle.
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


# картинки
def visualize(image, mask, original_image=None, original_mask=None):
    """
    Plot image and masks.
    If two pairs of images and masks are passes, show both.
    """
    fontsize = 14
    class_dict = {0: 'Fish', 1: 'Flower', 2: 'Gravel', 3: 'Sugar'}
    
    if original_image is None and original_mask is None:
        f, ax = plt.subplots(1, 5, figsize=(24, 24))

        ax[0].imshow(image)
        for i in range(4):
            ax[i + 1].imshow(mask[:, :, i])
            ax[i + 1].set_title(f'Mask {class_dict[i]}', fontsize=fontsize)
    else:
        f, ax = plt.subplots(2, 5, figsize=(24, 12))

        ax[0, 0].imshow(original_image)
        ax[0, 0].set_title('Original image', fontsize=fontsize)
                
        for i in range(4):
            ax[0, i + 1].imshow(original_mask[:, :, i])
            ax[0, i + 1].set_title(f'Original mask {class_dict[i]}', fontsize=fontsize)
        
        ax[1, 0].imshow(image)
        ax[1, 0].set_title('Transformed image', fontsize=fontsize)
        
        
        for i in range(4):
            ax[1, i + 1].imshow(mask[:, :, i])
            ax[1, i + 1].set_title(f'Transformed mask {class_dict[i]}', fontsize=fontsize)
            
# картинки и маска вероятностей            
def visualize_with_raw(image, mask, original_image=None, original_mask=None, raw_image=None, raw_mask=None):
    """
    Plot image and masks.
    If two pairs of images and masks are passes, show both.
    """
    fontsize = 14
    class_dict = {0: 'Fish', 1: 'Flower', 2: 'Gravel', 3: 'Sugar'}

    f, ax = plt.subplots(3, 5, figsize=(24, 12))

    ax[0, 0].imshow(original_image)
    ax[0, 0].set_title('Original image', fontsize=fontsize)

    for i in range(4):
        ax[0, i + 1].imshow(original_mask[:, :, i])
        ax[0, i + 1].set_title(f'Original mask {class_dict[i]}', fontsize=fontsize)


    ax[1, 0].imshow(raw_image)
    ax[1, 0].set_title('Original image', fontsize=fontsize)

    for i in range(4):
        ax[1, i + 1].imshow(raw_mask[:, :, i])
        ax[1, i + 1].set_title(f'Raw predicted mask {class_dict[i]}', fontsize=fontsize)
        
    ax[2, 0].imshow(image)
    ax[2, 0].set_title('Transformed image', fontsize=fontsize)


    for i in range(4):
        ax[2, i + 1].imshow(mask[:, :, i])
        ax[2, i + 1].set_title(f'Predicted mask with processing {class_dict[i]}', fontsize=fontsize)
            
# картинки + ауг            
def plot_with_augmentation(image, mask, augment):
    """
    Wrapper for `visualize` function.
    """
    augmented = augment(image=image, mask=mask)
    image_flipped = augmented['image']
    mask_flipped = augmented['mask']
    visualize(image_flipped, mask_flipped, original_image=image, original_mask=mask)
    
    
sigmoid = lambda x: 1 / (1 + np.exp(-x))



#
def post_process(probability, threshold, min_size):

    mask = cv2.threshold(probability, threshold, 1, cv2.THRESH_BINARY)[1]
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = np.zeros((350, 525), np.float32)
    num = 0
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
            num += 1
    return predictions, num

In [None]:
class CloudDataset(Dataset):
    def __init__(self, df: pd.DataFrame = None,
                 datatype: str = 'train',
                 img_ids: np.array = None,
                 transforms=None,
                 preprocessing=None,
                 path='../input/understanding_cloud_organization'):
        self.df = df
        if datatype != 'test':
            self.data_folder = f"{path}/train_images"
        else:
            self.data_folder = f"{path}/test_images"
        self.img_ids = img_ids
        self.transforms = transforms
        self.preprocessing = preprocessing

    def __getitem__(self, idx):
        image_name = self.img_ids[idx]
        mask = make_mask(self.df, image_name)
        image_path = os.path.join(self.data_folder, image_name)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']
        if self.preprocessing:
            preprocessed = self.preprocessing(image=img, mask=mask)
            img = preprocessed['image']
            mask = preprocessed['mask']
        return img, mask

    def __len__(self):
        return len(self.img_ids)


In [None]:
ENCODER = 'resnet18'
ENCODER_WEIGHTS = 'imagenet'
DEVICE = 'cuda'

ACTIVATION = None
model = smp.Unet(
    encoder_name=ENCODER,
    encoder_weights=ENCODER_WEIGHTS,
    classes=4,
    activation=ACTIVATION,
)
#print(model)
model.cuda()

In [None]:
preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

In [None]:
transform_t = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=0.5, border_mode=0),
        A.GridDistortion(p=0.5),
        A.OpticalDistortion(p=0.5, distort_limit=2, shift_limit=0.5),
        A.Resize(320, 640)
    ])

transform_v = A.Resize(320, 640)
preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)
preprocess = A.Compose([
        A.Lambda(image=preprocessing_fn),
        A.Lambda(image=to_tensor, mask=to_tensor)
    ]) 

In [None]:
bs = 34
num_workers = 4

valid_dataset = CloudDataset(df=train, datatype='valid', img_ids=valid_ids)
train_dataset = CloudDataset(df=train, 
                             datatype='train',
                             img_ids=train_ids, 
                             transforms = transform_t,
                             preprocessing=preprocess)
valid_dataset = CloudDataset(df=train,
                             datatype='valid',
                             img_ids=valid_ids,
                             transforms = transform_v,
                             preprocessing=preprocess)


train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size=bs, shuffle=False, num_workers=num_workers)


In [None]:
for i, (image, mask) in enumerate(train_loader):
    fig = plt.figure(figsize=(25, 6))
    if i == 4:
        break
    ax = fig.add_subplot(1, 5, 1, xticks=[], yticks=[])
    
    image = image[0]
    mask = mask[0]
    image = np.transpose(image, (1,2,0))
    #mask = np.transpose(mask, (1,2,0))
    image = np.array(255*image, dtype = np.int)
    mask = np.array(mask, dtype = np.int)
    plt.imshow(image)
    cm = ['Reds', 'Greens', 'Accent', 'Blues']
    #plt.imshow(masks[2], alpha = 0.3, cmap= 'Reds')
    #image = (image - 127.5) / 128

    for j in range(4):
        ax = fig.add_subplot(1, 5, 2 + j, xticks=[], yticks=[])
        plt.imshow(image)
        plt.imshow(mask[j], alpha = 0.5, cmap= 'gray')
    plt.axis('off')
    plt.show()

In [None]:
optimizer = torch.optim.Adam([
    {'params': model.decoder.parameters(), 'lr': 1e-2}, 
    {'params': model.encoder.parameters(), 'lr': 1e-3},  
])
scheduler = ReduceLROnPlateau(optimizer, factor=0.15, patience=2)
criterion = smp.utils.losses.BCEWithLogitsLoss()


In [None]:
# num_epochs = 25

# # model, criterion, optimizer
# optimizer = torch.optim.Adam(model.parameters(), 1e-3)

# scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0)
# criterion = smp.utils.losses.BCEWithLogitsLoss()

# probabilites = nn.Sigmoid()
# best_dice = 0

# for epoch in range(num_epochs):
#     model.train()
#     scheduler.step(epoch)

#     train_loss = 0
#     num_samples = 0
#     train_dice = 0

#     for i, (img, mask) in enumerate(train_loader):
#         i += 1
#         img = img.cuda()
#         mask = mask.cuda()

#         optimizer.zero_grad()

#         prediction = model(img)
#         loss = criterion(prediction, mask)
#         loss.backward()
#         optimizer.step()
#         loss = loss.cpu().detach().numpy()
#         train_loss += loss * img.size(0)
#         num_samples += img.size(0)

#         dice = soft_dice_score(probabilites(prediction), mask)
#         train_dice += dice.cpu().detach().numpy() * img.size(0)

#         if i % 100 == 0 or i % len(train_loader) == 0:
#             print('Epoch:', epoch, 'Train_loss:', train_loss/num_samples, 'Train_dice:', train_dice/num_samples,
#                   'Sample/Total:', i, '/', len(train_loader))

#     model.eval()
#     val_dice = 0
#     num_samples = 0
#     with torch.no_grad():
#         for img, mask in valid_loader:
#             img = img.cuda()
#             mask = mask.cuda()

#             pred = model(img)
#             dice = soft_dice_score(probabilites(pred), mask)
#             val_dice += dice.cpu().detach().numpy() * img.size(0)
#             num_samples += img.size(0)

#     print('')
#     print('VAL_DICE:', val_dice/num_samples)
#     if val_dice/num_samples > best_dice:
#         best_dice = val_dice/num_samples
#         torch.save(model.state_dict(), 'unet_eff.pth')


In [None]:
!kaggle kernels output cinnamonpalette/notebookf75a957689 

In [None]:
!git clone https://github.com/arabel1a/hg-set.git
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp ./hg-set/kaggle.json ~/.kaggle/
!kaggle kernels output cinnamonpalette/notebookf75a957689 -p /path/to/dest


In [None]:
model.load_state_dict(torch.load('../input/aaaaaa/unet_resnet50(4).pth'))

In [None]:
import tqdm
model.eval()
predicts = []
with torch.no_grad():
    for img, mask in tqdm.tqdm(valid_loader):
        img = img.cuda()
        mask = mask.cuda()
        pred = model(img)
#         prob = probabilites(pred)
#         predicts.append(pred.detach().cpu().numpy())
        predicts.append(pred.detach().cpu().numpy())


In [None]:
predicts[0].shape

In [None]:
np.split(predicts[0], 34)[0].shape
p_predicts = []
for batch in predicts:
    for mask in np.split(batch, batch.shape[0]):
        mask.resize(mask.shape[1:])
        p_predicts.append(mask)


In [None]:
valid_masks = []
probabilities = np.zeros((4*len(valid_dataset), 350, 525))
probabilities.shape
for i, (batch, output) in enumerate(tqdm.tqdm(zip(
        valid_dataset, p_predicts))):
    image, mask = batch
    for m in mask:
        if m.shape != (350, 525):
            m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
        valid_masks.append(m)

    for j, probability in enumerate(output):
        if probability.shape != (350, 525):
            probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
        probabilities[i * 4 + j, :, :] = probability

In [None]:
class_params = {}
for class_id in range(4):
    print(class_id)
    attempts = []
    for t in tqdm.tqdm(range(0, 100, 5)):
        t /= 100
        for ms in [5000, 10000, 25000, 50000, 100000]:
            masks = []
            for i in range(class_id, len(probabilities), 4):
                probability = probabilities[i]
                predict, num_predict = post_process(sigmoid(probability), t, ms)
                masks.append(predict)

            d = []
            for i, j in zip(masks, valid_masks[class_id::4]):
                if (i.sum() == 0) & (j.sum() == 0):
                    d.append(1)
                else:
                    d.append(dice(i, j))

            attempts.append((t, ms, np.mean(d)))

    attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])


    attempts_df = attempts_df.sort_values('dice', ascending=False)
    print(attempts_df.head())
    best_threshold = attempts_df['threshold'].values[0]
    best_size = attempts_df['size'].values[0]
    
    class_params[class_id] = (best_threshold, best_size)

In [None]:
print(class_params)

In [None]:
for i, (input, output) in enumerate(zip(
        valid_dataset, p_predicts)):
    image, mask = input
        
    image_vis = image.transpose(1, 2, 0)
    mask = mask.astype('uint8').transpose(1, 2, 0)
    pr_mask = np.zeros((350, 525, 4))
    for j in range(4):
        probability = cv2.resize(output.transpose(1, 2, 0)[:, :, j], dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
        pr_mask[:, :, j], _ = post_process(sigmoid(probability), class_params[j][0], class_params[j][1])
    #pr_mask = (sigmoid(output) > best_threshold).astype('uint8').transpose(1, 2, 0)
    
        
    visualize_with_raw(image=image_vis, mask=pr_mask, original_image=image_vis, original_mask=mask, raw_image=image_vis, raw_mask=output.transpose(1, 2, 0))
    
    if i >= 2:
        break

In [None]:
test_dataset = CloudDataset(df=sub, datatype='test', img_ids=test_ids, transforms = transform_v, preprocessing=preprocess)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False, num_workers=6)


In [None]:
encoded_pixels = []
image_id = 0
model.eval()
predicts = []
with torch.no_grad():        
    for i, (img, mask) in enumerate(tqdm.tqdm(test_loader)):
        img = img.cuda()
        mask = mask.cuda()
        pred = model(img)
        for i, batch in enumerate(pred):
            for probability in batch:
                probability = probability.cpu().detach().numpy()
                if probability.shape != (350, 525):
                    probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                predict, num_predict = post_process(sigmoid(probability), class_params[image_id % 4][0], class_params[image_id % 4][1])
                if num_predict == 0:
                    encoded_pixels.append('')
                else:
                    r = mask2rle(predict)
                    encoded_pixels.append(r)
                image_id += 1

In [None]:
sub['EncodedPixels'] = encoded_pixels
sub.to_csv('submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False)
len(sub)