In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib as mpl

In [None]:
# set images to bigger size
mpl.rcParams['figure.figsize'] = [6.0, 6.0]

In [None]:
df = pd.read_csv("../input/airbus-ship-detection/train_ship_segmentations_v2.csv")
df

In [None]:
# General info about dataset

import math
from pprint import pprint

df["ShipPresent"] = df.EncodedPixels.apply(type) != float

print("Empty images (no ships in frame)", len(df.ShipPresent[df.ShipPresent == False]))

# Drop empty images to reduce weight of dfset
# df = df[ship_present]

ship_occurances = dict()
for name in df.ImageId:
    ship_occurances[name] = ship_occurances.get(name, 0) + 1

number_of_occurances = dict()
for num in ship_occurances.values():
    number_of_occurances[num] = number_of_occurances.get(num, 0) + 1

print(f"Number of ships in image: ")
pprint(number_of_occurances)  # Special print for dictionaries that sortes them by keys

print(f"Average number of ships in image = {sum(ship_occurances.values()) / len(ship_occurances.values())}")

In [None]:
df

In [None]:
# Test image from dfset

img = mpimg.imread(f'../input/airbus-ship-detection/train_v2/{df.ImageId[2]}')
plt.imshow(img)
plt.show()

In [None]:
def rle_decode(mask_rle, shape):
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1

    return img.reshape(shape).T

In [None]:
# Display ship
mask = df["EncodedPixels"][2]

mask = rle_decode(mask, (768, 768))
plt.imshow(mask)
plt.show()

In [None]:
# from collections import defaultdict


df = df.head(2000)  # Shorten dataset

dsf = pd.DataFrame()

names = set()
contains_ship = dict()
for index, row in df.iterrows():
    names.add(row.ImageId)
    if row.ShipPresent:
        contains_ship[row.ImageId] = True
    else:
        contains_ship[row.ImageId] = False

        
for name in names:
    dsf = dsf.append({'X': name, 'Y': contains_ship[name]}, ignore_index=True)
    
# dsf

In [None]:
################################################################################################################################################################
##   Training classifier
################################################################################################################################################################

In [None]:
!pip install mplcyberpunk

In [None]:
import os
import torch
from matplotlib import pyplot as plt
from skimage import io, transform
from sklearn.datasets import load_digits
from torch import optim
import torch.nn.functional as F
import torch.nn as nn
from torch.utils import data
import numpy as np
from IPython.display import clear_output
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset

from torch.nn import MSELoss

from sklearn.metrics import accuracy_score

import mplcyberpunk

In [None]:
class ImageDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, csv_file, root_dir, df):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.landmarks_frame = df
        self.root_dir = root_dir
        self.transforms = transforms

    def __len__(self):
        return len(self.landmarks_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir, self.landmarks_frame.iloc[idx, 0])
        x = io.imread(img_name)
        
        y = self.landmarks_frame.iloc[idx, 1:][0]
        y = y.astype('int')
        
        norm = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(0.5, 0.5)
        ])
        
#         print(type(x))
#         print(type(norm(x)))

        return norm(x), y

In [None]:
# Dataloader
IMAGE_SIZE = 768

ds = ImageDataset(csv_file='../input/airbus-ship-detection/sample_submission_v2.csv', root_dir='../input/airbus-ship-detection/train_v2/', df=dsf)

In [None]:
from torch.utils.data import random_split

BATCH_SIZE = 8
train_len = int(len(ds) / 100 * 80)

print(len(ds))

# Train/test split
ds_train, ds_test = random_split(ds, [train_len, len(ds) - train_len], generator=torch.Generator().manual_seed(1303))

train_loader = DataLoader(ds_train, batch_size=BATCH_SIZE)
test_loader = DataLoader(ds_test, batch_size=BATCH_SIZE)

In [None]:
from matplotlib import pyplot as plt


print(len(train_loader))
print(len(test_loader))

# Print first 5 images for testing purposes
for idx, batch in enumerate(train_loader):
    if (idx > 2):
        break
    
    x, y = batch
    
    img = x[0].permute(1, 2, 0).cpu().numpy() * 256

    plt.imshow(img)

    plt.xticks([])
    plt.yticks([])

    plt.show()

In [None]:
present, none = 0, 0

for x, y in train_loader:
    for ans in y:
        if ans == 1:
            present += 1
        else:
            none += 1
            
print(f"Present ship in train: {present} / {present + none}")

In [None]:
present, none = 0, 0

for x, y in test_loader:
    for ans in y:
        if ans == 1:
            present += 1
        else:
            none += 1
            
print(f"Present ship in test: {present} / {present + none}")

In [None]:
# Classifier down below

EPOCH_NUMBER = 15

plt.style.use("cyberpunk")


# Net structure
net = nn.Sequential(
    nn.Conv2d(3, 32, 3, stride=1, padding=1, padding_mode="circular"),
    nn.ReLU(),
    nn.MaxPool2d(2),
    
    nn.Conv2d(32, 64, 3, stride=1, padding=1, padding_mode="circular"),
    nn.ReLU(),
    nn.MaxPool2d(2),
    
    nn.Conv2d(64, 128, 3, stride=1, padding=1, padding_mode="circular"),
    nn.ReLU(),
    nn.MaxPool2d(2),
    
    nn.Flatten(),
    
    nn.Linear(int(IMAGE_SIZE / 8) ** 2 * 128, 256),
    nn.LeakyReLU(0.1),
    
    nn.Linear(256, 256),
    nn.LeakyReLU(0.1),
    
    nn.Linear(256, 256),
    nn.LeakyReLU(0.1),
    nn.Linear(256, 2),
    
    nn.Sigmoid()
).cuda()


opt = optim.SGD(net.parameters(), lr=1e-1)
criterion = MSELoss()  # From now on error = criterion(pred, y)


train_loss, test_loss = [], []
for epoch in range(EPOCH_NUMBER):
    train_errors, train_accuracy = [], []
    
    for x, y in train_loader:
#         x = x.permute(0, 3, 1, 2).float()
        pred = net(x.cuda())
        
        y = F.one_hot(y, 2).to(torch.float32).cuda()
        
        err = criterion(pred, y)
        err.backward()
        
        pred_conv = torch.argmax(pred, dim=1).detach().cpu().numpy()
        y_conv = torch.argmax(y, dim=1).detach().cpu().numpy()
        
#         print(pred, pred_conv, y_conv)
#         print()
        
        train_accuracy.append(accuracy_score(pred_conv, y_conv))
        train_errors.append(err.item())
        
        opt.step()
        opt.zero_grad()


    test_errors, test_accuracy = [], []
    for x, y in test_loader:
#         x = x.permute(0, 3, 1, 2).float()
        pred = net(x.cuda())
        y = F.one_hot(y, 2).to(torch.float32).cuda()
        
        pred_conv = torch.argmax(pred, dim=1).detach().cpu().numpy()
        y_conv = torch.argmax(y, dim=1).detach().cpu().numpy()

        test_accuracy.append(accuracy_score(pred_conv, y_conv))
        test_errors.append(err.item())
        
    
    train_loss.append(sum(train_errors) / len(train_errors))
    test_loss.append(sum(test_errors) / len(test_errors))
    
    # Output
    clear_output(True)
    
    print(f'Epoch: {epoch} train: error: {sum(train_errors) / len(train_errors)}, accuracy: {sum(train_accuracy) / len(train_accuracy)}')
    print(f'Epoch: {epoch} test: error: {sum(test_errors) / len(test_errors)}, accuracy: {sum(test_accuracy) / len(test_accuracy)}')
    print()
    
    plt.plot(train_loss, label='train') 
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend()
    plt.show()
    
    plt.plot(test_loss, label='test')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend()
    plt.show()
    