In [1]:
!ls /kaggle/input/pyvips-python-and-deb-package-gpu
# intall the deb packages
!yes | dpkg -i --force-depends /kaggle/input/pyvips-python-and-deb-package-gpu/linux_packages/archives/*.deb
# install the python wrapper
!pip install pyvips -f /kaggle/input/pyvips-python-and-deb-package-gpu/python_packages/ --no-index
!pip list | grep pyvips

linux_packages	python_packages
Selecting previously unselected package apparmor.
(Reading database ... 113818 files and directories currently installed.)
Preparing to unpack .../apparmor_3.0.4-2ubuntu2.2_amd64.deb ...
Unpacking apparmor (3.0.4-2ubuntu2.2) ...
Selecting previously unselected package autoconf.
Preparing to unpack .../autoconf_2.71-2_all.deb ...
Unpacking autoconf (2.71-2) ...
Selecting previously unselected package automake.
Preparing to unpack .../automake_13a1.16.5-1.3_all.deb ...
Unpacking automake (1:1.16.5-1.3) ...
Selecting previously unselected package autotools-dev.
Preparing to unpack .../autotools-dev_20220109.1_all.deb ...
Unpacking autotools-dev (20220109.1) ...
Selecting previously unselected package bzip2-doc.
Preparing to unpack .../bzip2-doc_1.0.8-5build1_all.deb ...
Unpacking bzip2-doc (1.0.8-5build1) ...
Selecting previously unselected package file.
Preparing to unpack .../file_13a5.41-3ubuntu0.1_amd64.deb ...
Unpacking file (1:5.41-3

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import os
from tqdm.notebook import tqdm

from PIL import Image
import pyvips
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.utils.data.sampler import WeightedRandomSampler
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.optim import Adam
import timm
import seaborn as sns

from sklearn.preprocessing import LabelEncoder

from collections import Counter

from joblib import Parallel, delayed

import gc

path = '/kaggle/input/UBC-OCEAN'
Image.MAX_IMAGE_PIXELS = 20641052620
device = torch.device('cuda:0') if torch.cuda.is_available else torch.device('cpu')
df = pd.read_csv(f'{path}/train.csv')
!nvidia-smi



Mon Nov  6 18:41:26 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla T4            Off  | 00000000:00:05.0 Off |                    0 |
| N/A   38C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Defaul

### Crop duplicates and adding paths into train.csv (Just copypast)

In [3]:
from scipy.signal import find_peaks, savgol_filter
import cv2

def smooth_signal(signal, window_length=5, polyorder=3):
    return savgol_filter(signal, window_length, polyorder)

def detect_peaks(signal, prominence=150):  # Adjust prominence as needed
    peaks, _ = find_peaks(signal, prominence=prominence)
    return peaks

def crop_image(input_image):
    image = cv2.imread(input_image)

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Threshold the image to create a binary image
    _, binary = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)

    # Find the contours (regions) in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Extract bounding boxes around each contour and save them as individual images
    cropped_images=[]
    for i, contour in enumerate(contours):
        x, y, w, h = cv2.boundingRect(contour)
        cropped_image = image[y:y+h, x:x+w]
        if np.sum(np.array(cropped_image))>100000:
            cropped_images.append(cropped_image)
    return cropped_images

def scale_signal(signal, min_range, max_range):
    # Find the minimum and maximum of the signal
    min_value = np.min(signal)
    max_value = np.max(signal)
    
    # Scale the signal
    scaled_signal = min_range + (signal - min_value) / (max_value - min_value) * (max_range - min_range)
    
    return scaled_signal

def get_path(image_id):
    if os.path.exists(f'{path}/train_thumbnails/{image_id}_thumbnail.png'):
        return f'{path}/train_thumbnails/{image_id}_thumbnail.png'
    return f'{path}/train_images/{image_id}.png'
    
df['img_path'] = df['image_id'].apply(get_path)

for i in range(len(df)):
    image = Image.open(df['img_path'][i])
    smoothed_signal = smooth_signal(np.mean(np.array(image)[:,:,0], axis=0))
    peaks = detect_peaks(smoothed_signal)
    if len(peaks)>1: #Check if there is more than one image
        image_path = df['img_path'][i]
        images = crop_image(image_path)
        im = Image.fromarray(images[0])
        im_id = df['image_id'][i]
        im.save(f'/kaggle/working/{im_id}.png')
        df.loc[i, 'img_path'] = f'/kaggle/working/{im_id}.png'
        
for i in range(len(df)):
    im = Image.open(df['img_path'][i])
    df.loc[i, 'image_width'], df.loc[i, 'image_height'] = im.size

### Dataset

In [4]:
labels = list(df['label'].unique())
labels.append('Other')
encoder = LabelEncoder()
encoder.fit(labels)

In [5]:
class UBCDadaset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.paths = df['img_path'].values
        self.labels = encoder.transform(df['label'].values)
        self.transforms = transforms
        
    def __getitem__(self, idx):
        img_path = self.paths[idx]
        img_label = self.labels[idx]
        
        #resize to ~1024*1024
        img = Image.open(img_path)
        w, h = img.size
        img = img.resize((int(w*1024 / max(w, h)),
                        int(h*1024 / max(w, h))))
                         
        if self.transforms:
            img = self.transforms(img)
        return img, int(img_label)
        
    def __len__(self):
        return len(self.df)
    
    def labels(self):
        return list(self.labels)
    
train_transforms = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.Lambda(lambda x: np.array(x, dtype="float32") / 255),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
#     transforms.Normalize(img_color_mean, img_color_std),
])

train_dataset = UBCDadaset(df, train_transforms)

### Model

In [6]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'
    
class MyModel(nn.Module):
    def __init__(self, model_name, pretrained, n_classes):
        super(MyModel, self).__init__()
        self.net = timm.create_model(model_name, pretrained)
        
        in_futures = self.net.classifier.in_features
        self.net.classifier = nn.Identity()
        self.net.global_pool = nn.Identity()
        self.gem = GeM()
        self.lin = nn.Linear(in_futures, n_classes)
    
    def forward(self, x):
        x = self.net(x)
        x = self.gem(x).flatten(1)
        output = self.lin(x)
        return output

In [7]:
model = MyModel('tf_efficientnet_b0', pretrained=False, n_classes=len(encoder.classes_))
model.to(device)

optimizer = Adam(model.parameters(), lr=1e-4)
sheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
# criterion = nn.CrossEntropyLoss()
# model

### train

In [8]:
class_count = Counter(train_dataset.labels)
class_weights = {i: 1/c for i, c in class_count.items()}
sample_weights = [0] * len(train_dataset)
for i, (data, label) in enumerate(tqdm(train_dataset)):
    class_weight = class_weights[label]
    sample_weights[i] = class_weight
    
N = max(class_count.values()) * len(class_count)  # fit to max
train_sampler = WeightedRandomSampler(sample_weights, num_samples=N, replacement=True)
train_loader = DataLoader(train_dataset, batch_size=16, sampler=train_sampler, num_workers=2)

In [9]:
def criterion(outputs, labels):
    return nn.CrossEntropyLoss()(outputs, labels)

n_epochs = 20
train_loss = []
train_acc = []

for n in range(n_epochs):
    gc.collect()
    running_loss = 0.0
    running_acc = 0.0
    process_data = 0
    model.train()
    bar = tqdm(enumerate(train_loader), total=len(train_loader))
    
    for _, data in bar:
        x_batch, y_batch = data
        x_batch, y_batch = x_batch.to(device, dtype=torch.float), y_batch.to(device, dtype=torch.long)
        optimizer.zero_grad()
        target = model(x_batch)
        loss = criterion(target, y_batch)
        loss.backward()
        optimizer.step()
        
        pred = F.softmax(target, dim=-1).argmax(-1)
        process_data += x_batch.size(0)
        running_loss = loss.item() * x_batch.size(0)
        running_acc += torch.sum(pred == y_batch.data)
        
        epoch_loss = running_loss / process_data
        epoch_acc = running_acc.cpu().numpy() / process_data
        
        bar.set_postfix(Epoch=n, Train_Loss=epoch_loss, Train_Acc=epoch_acc,
                        LR=optimizer.param_groups[0]['lr'])
        
    train_loss.append(epoch_loss)
    train_acc.append(epoch_acc)
    if n % 3 == 0:
        sheduler.step()

In [10]:
torch.save(model, '/kaggle/working/tf_efficientnet_b0_pretrained.pth')

### Load model

In [11]:
model_path = '/kaggle/input/pretrained-model/tf_efficientnet_b0_pretrained (1).pth'
model = torch.load(model_path)

### Test dataset and loader

In [12]:
df_test = pd.read_csv(f'{path}/test.csv')

def get_path_test(image_id):
    return f'{path}/test_images/{image_id}.png'

df_test['img_path'] = df_test['image_id'].apply(get_path_test)

In [13]:
class UBCTestDadaset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.paths = df['img_path'].values
        self.transforms = transforms
        
    def __getitem__(self, idx):
        img_path = self.paths[idx]
        
        #resize to ~1024*1024
        image_thumbnail = pyvips.Image.thumbnail(img_path, 1024)
        im_np = np.ndarray(
            buffer=image_thumbnail.write_to_memory(),
            dtype=np.uint8,
            shape=[image_thumbnail.height, image_thumbnail.width, image_thumbnail.bands])
        img = Image.fromarray(im_np)
                         
        if self.transforms:
            img = self.transforms(img)
        return img
        
    def __len__(self):
        return len(self.df)
    
    def labels(self):
        return list(self.labels)

test_transforms = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.Lambda(lambda x: np.array(x, dtype="float32") / 255),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

In [14]:
test_dataset = UBCTestDadaset(df_test, transforms=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=4, num_workers=2, shuffle=False)

### Prediction and submission

In [15]:
def predict(model, test_loader):
    with torch.no_grad():
        logits = np.array([])

        for inputs in test_loader:
            inputs = inputs.to(device)
            model.eval()
            targets = model(inputs).cpu()
            pred = F.softmax(targets, dim=-1).argmax(-1).numpy()
            logits = np.concatenate((logits, pred)).astype(int)
            
    ans = encoder.inverse_transform(logits)
    return ans

model.to(device)
model.eval()
pred = predict(model, test_loader)
df_test['label'] = pred
sub = df_test[["image_id", "label"]]
sub.to_csv("submission.csv", index=False)