**Code copy and edit from 


**https://www.kaggle.com/code/khailashsanthakumar/ubc-pytorch-effb0-with-classweights-inference

**https://www.kaggle.com/code/motono0223/ubc-infer-efficientnetb0-crop-resize-2048pix

**https://www.kaggle.com/code/yinankaggle/ensemble-inference-resnet50


Please upvote if it helps you.

In [1]:
import os
import gc
import cv2
import math
import copy
import time
import random
import glob
from PIL import Image
from matplotlib import pyplot as plt

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"



In [2]:
CONFIG = {
    "seed": 42,
    "img_size": 2048,
    "model_name": "tf_efficientnetv2_s_in21ft1k",
    "num_classes": 5,
    "valid_batch_size":4,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}

In [3]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG['seed'])

In [4]:
ROOT_DIR = '/kaggle/input/UBC-OCEAN'
TEST_DIR = '/kaggle/input/UBC-OCEAN/test_thumbnails'
ALT_TEST_DIR = '/kaggle/input/UBC-OCEAN/test_images'
LABEL_ENCODER_BIN = "/kaggle/input/ubcpytorchwith-classweights-training-fold1of5/label_encoder.pkl"
BEST_WEIGHT = "/kaggle/input/baseline-0-36/Acc0.70_Loss1.0140_epoch29_tf_efficientnetv2_s_in21ft1k_0.36.bin"
BEST_WEIGHT2 = "/kaggle/input/ubc-efficienetnetb0-fold1of10-2048pix-thumbnails/Recall0.9178_Acc0.9437_Loss0.1685_epoch9.bin"
BEST_WEIGHT3 = "/kaggle/input/ubc-efficienetnetb0-fold1of10-2048pix-thumbnails/Recall0.8858_Acc0.9155_Loss0.2106_epoch1.bin"
BEST_WEIGHT4 = "/kaggle/input/ver-21-10/Acc0.50_Loss1.2095_epoch4.bin"

In [5]:
def get_test_file_path(image_id):
    if os.path.exists(f"{TEST_DIR}/{image_id}_thumbnail.png"):
        return f"{TEST_DIR}/{image_id}_thumbnail.png"
    else:
        return f"{ALT_TEST_DIR}/{image_id}.png"

In [6]:
df = pd.read_csv(f"{ROOT_DIR}/test.csv")
df['file_path'] = df['image_id'].apply(get_test_file_path)
df['label'] = 0 # dummy
df

Unnamed: 0,image_id,image_width,image_height,file_path,label
0,41,28469,16987,/kaggle/input/UBC-OCEAN/test_thumbnails/41_thu...,0


In [7]:
df_sub = pd.read_csv(f"{ROOT_DIR}/sample_submission.csv")
df_sub

Unnamed: 0,image_id,label
0,41,HGSC


In [8]:
encoder = joblib.load( LABEL_ENCODER_BIN )

In [9]:
def get_cropped_images(file_path, image_id, th_area = 1000):
    image = Image.open(file_path)
    # Aspect ratio
    as_ratio = image.size[0] / image.size[1]
    
    sxs, exs, sys, eys = [],[],[],[]
    if as_ratio >= 1.5:
        # Crop
        mask = np.max( np.array(image) > 0, axis=-1 ).astype(np.uint8)
        retval, labels = cv2.connectedComponents(mask)
        if retval >= as_ratio:
            x, y = np.meshgrid( np.arange(image.size[0]), np.arange(image.size[1]) )
            for label in range(1, retval):
                area = np.sum(labels == label)
                if area < th_area:
                    continue
                xs, ys= x[ labels == label ], y[ labels == label ]
                sx, ex = np.min(xs), np.max(xs)
                cx = (sx + ex) // 2
                crop_size = image.size[1]
                sx = max(0, cx-crop_size//2)
                ex = min(sx + crop_size - 1, image.size[0]-1)
                sx = ex - crop_size + 1
                sy, ey = 0, image.size[1]-1
                sxs.append(sx)
                exs.append(ex)
                sys.append(sy)
                eys.append(ey)
        else:
            crop_size = image.size[1]
            for i in range(int(as_ratio)):
                sxs.append( i * crop_size )
                exs.append( (i+1) * crop_size - 1 )
                sys.append( 0 )
                eys.append( crop_size - 1 )
    else:
        # Not Crop (entire image)
        sxs, exs, sys, eys = [0,],[image.size[0]-1],[0,],[image.size[1]-1]

    df_crop = pd.DataFrame()
    df_crop["image_id"] = [image_id] * len(sxs)
    df_crop["file_path"] = [file_path] * len(sxs)
    df_crop["sx"] = sxs
    df_crop["ex"] = exs
    df_crop["sy"] = sys
    df_crop["ey"] = eys
    return df_crop

In [10]:
dfs = []
for (file_path, image_id) in zip(df["file_path"], df["image_id"]):
    dfs.append( get_cropped_images(file_path, image_id) )

df_crop = pd.concat(dfs)
df_crop["label"] = 0 # dummy
df_crop

Unnamed: 0,image_id,file_path,sx,ex,sy,ey,label
0,41,/kaggle/input/UBC-OCEAN/test_thumbnails/41_thu...,604,2393,0,1789,0


In [11]:
df_crop = df_crop.drop_duplicates(subset=["image_id", "sx", "ex", "sy", "ey"]).reset_index(drop=True)
df_crop

Unnamed: 0,image_id,file_path,sx,ex,sy,ey,label
0,41,/kaggle/input/UBC-OCEAN/test_thumbnails/41_thu...,604,2393,0,1789,0


In [12]:
class UBCDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.file_names = df['file_path'].values
        self.labels = df['label'].values
        self.transforms = transforms
        self.sxs = df["sx"].values
        self.exs = df["ex"].values
        self.sys = df["sy"].values
        self.eys = df["ey"].values
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_path = self.file_names[index]
        sx = self.sxs[index]
        ex = self.exs[index]
        sy = self.sys[index]
        ey = self.eys[index]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        label = self.labels[index]
        
        img = img[ sy:ey, sx:ex, : ]
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
            
        return {
            'image': img,
            'label': torch.tensor(label, dtype=torch.long)
        }

In [13]:
data_transforms = {
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),
        A.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
        ToTensorV2()], p=1.)
}

In [14]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

In [15]:
class UBCModel(nn.Module):
    def __init__(self, model_name, num_classes, pretrained=False, checkpoint_path=None):
        super(UBCModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)

        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.linear = nn.Linear(in_features, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        output = self.linear(pooled_features)
        return output

    
model = UBCModel('tf_efficientnetv2_s_in21ft1k', CONFIG['num_classes'])
model2 = UBCModel('tf_efficientnet_b0_ns', CONFIG['num_classes'])
model3 = UBCModel('tf_efficientnet_b0_ns', CONFIG['num_classes'])
model4 = UBCModel('tf_efficientnetv2_l', CONFIG['num_classes'])
model.load_state_dict(torch.load( BEST_WEIGHT ))
model.to(CONFIG['device']);
model2.load_state_dict(torch.load( BEST_WEIGHT2 ))
model2.to(CONFIG['device']);
model3.load_state_dict(torch.load( BEST_WEIGHT3 ))
model3.to(CONFIG['device']);
model4.load_state_dict(torch.load( BEST_WEIGHT4 ))
model4.to(CONFIG['device'])

UBCModel(
  (model): EfficientNet(
    (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNormAct2d(
      32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): ConvBnAct(
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (drop_path): Identity()
        )
        (1): ConvBnAct(
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
 

In [16]:
test_dataset = UBCDataset(df_crop, transforms=data_transforms["valid"])
test_loader = DataLoader(test_dataset, batch_size=CONFIG['valid_batch_size'], 
                          num_workers=2, shuffle=False, pin_memory=True)

In [17]:
preds = []
with torch.no_grad():
    bar = tqdm(enumerate(test_loader), total=len(test_loader))
    for step, data in bar:        
        images = data['image'].to(CONFIG["device"], dtype=torch.float)        
        
        outputs1 = model(images)
        outputs2 = model2(images)
        outputs3 = model3(images)
        outputs4 = model4(images)
        outputs = 0.65*(0.35*outputs3+0.65*outputs2)+0.35*(0.55*outputs1+0.45*outputs4)
        outputs = model.softmax(outputs)
        preds.append( outputs.detach().cpu().numpy() )

preds = np.vstack(preds)
print(preds.shape)

100%|██████████| 1/1 [00:06<00:00,  6.27s/it]

(1, 5)





In [18]:
for i in range(preds.shape[-1]):
    df_crop[f"cat{i}"] = preds[:, i]

dict_label = {}
for image_id, gdf in df_crop.groupby("image_id"):
    dict_label[image_id] = np.argmax( gdf[ [f"cat{i}" for i in range(preds.shape[-1])] ].values.max(axis=0) )
    #dict_label[image_id] = np.argmax( gdf[ [f"cat{i}" for i in range(preds.shape[-1])] ].values.mean(axis=0) )
preds = np.array( [ dict_label[image_id] for image_id in df["image_id"].values ] )

In [19]:
pred_labels = encoder.inverse_transform( preds )
df_sub["label"] = pred_labels
df_sub.to_csv("submission.csv", index=False)

In [20]:
df_sub

Unnamed: 0,image_id,label
0,41,HGSC
