## 📂 Importing necessary libraries


Fork from https://www.kaggle.com/code/zulqarnainali/fork-of-lb-0-57/notebook

# import library

In [1]:
import os

import gc

import cv2

import math

import copy

import time

import random

import glob

from PIL import Image

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import torchvision

import joblib
from tqdm import tqdm
from collections import defaultdict

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

import timm

import albumentations as A
from albumentations.pytorch import ToTensorV2

from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"




# set config

## model is pretrained

In [2]:
CONFIG = {
    "seed": 777,
    "img_size": 2054,
    "model_name": "tf_efficientnetv2_s_in21ft1k",
    "num_classes": 5,
    "valid_batch_size": 4,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
}


# set random_seed

In [3]:
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
set_seed(CONFIG['seed'])

# directory

In [4]:
ROOT_DIR = '/kaggle/input/UBC-OCEAN'  # 📁 Root directory containing the dataset
TEST_DIR = '/kaggle/input/UBC-OCEAN/test_thumbnails'  # 📁 Test thumbnails directory
ALT_TEST_DIR = '/kaggle/input/UBC-OCEAN/test_images'  # 📁 Alternative test images directory
Model_predict = '/kaggle/input/check-ponit007'  # 📁 Model prediction directory
LABEL_ENCODER_BIN = "/kaggle/input/ubcpytorchwith-classweights-training-fold1of5/label_encoder.pkl"  # 📄 Label encoder binary file
BEST_WEIGHT = "/kaggle/input/baseline-0-36/Acc0.70_Loss1.0140_epoch29_tf_efficientnetv2_s_in21ft1k_0.36.bin"  # 📄 Best weight file 1
BEST_WEIGHT2 = "/kaggle/input/ubc-efficienetnetb0-fold1of10-2048pix-thumbnails/Recall0.9178_Acc0.9437_Loss0.1685_epoch9.bin"  # 📄 Best weight file 2
BEST_WEIGHT3 = "/kaggle/input/ubc-efficienetnetb0-fold1of10-2048pix-thumbnails/Recall0.8858_Acc0.9155_Loss0.2106_epoch1.bin"  # 📄 Best weight file 3
BEST_WEIGHT4 = "/kaggle/input/ver-21-10/Acc0.50_Loss1.2095_epoch4.bin" 
BEST_WEIGHT5="/kaggle/input/ubc-fine-tuning-with-loss-for-class-imbalance/Recall0.9022_Acc0.9067_Loss0.1111_epoch5.bin"

# transform path and make path

In [5]:
def get_test_file_path(image_id):
    if os.path.exists(f"{TEST_DIR}/{image_id}_thumbnail.png"):
        return f"{TEST_DIR}/{image_id}_thumbnail.png"
    else:
        return f"{ALT_TEST_DIR}/{image_id}.png"


In [6]:
df = pd.read_csv(f"{ROOT_DIR}/test.csv")

df['file_path'] = df['image_id'].apply(get_test_file_path)

df['label'] = 0

In [7]:
df_sub = pd.read_csv(f"{ROOT_DIR}/sample_submission.csv")

# class labels ⇔ predicted

In [8]:
encoder = joblib.load(LABEL_ENCODER_BIN)

    labels == label は、labels 配列内で label に等しい要素がある場所で True となり、それ以外は False となります。
    np.sum(labels == label) は、True の要素の数を合計します。これにより、指定された label に属するピクセルの数（面積）が計算されます。

# method of adjust image

In [9]:
# トリミング画像の取得
def get_cropped_images(file_path, image_id, th_area=1000):
    image = Image.open(file_path)

    as_ratio = image.size[0] / image.size[1]

    sxs, exs, sys, eys = [], [], [], []

    if as_ratio >= 1.5:
        # アスペクト比が1.5>=の時（横長の時にトリミング）

        #非0のピクセルのマスクの作製
        mask = np.max(np.array(image) > 0, axis=-1).astype(np.uint8)

        # マスクから結合の要素を抽出（連結成分は、隣接する非ゼロのピクセルが同じラベルでマークされた領域）
        retval, labels = cv2.connectedComponents(mask)

        if retval >= as_ratio:
           #連結部分がアスペクト比以上の時、境界ボックスを計算
            x, y = np.meshgrid(np.arange(image.size[0]), np.arange(image.size[1]))
            for label in range(1, retval):
               #連結部分の面積の計算
                area = np.sum(labels == label)
                #面積がth_area未満の場合にはスキップ
                if area < th_area:
                    continue

                #連結部分の座標の取得
                xs, ys = x[labels == label], y[labels == label]

                #連結部分のx座標の最小値と最大値の取得
                sx, ex = np.min(xs), np.max(xs)
                #中心座標
                cx = (sx + ex) // 2
                #トリミング領域のサイズ
                crop_size = image.size[1]
                #トリミング領域開始x座標
                sx = max(0, cx - crop_size // 2)
                #トリミング領域終了x座標
                ex = min(sx + crop_size - 1, image.size[0] - 1)
                #トリミング領域開始x座標再調整
                sx = ex - crop_size + 1
                #トリミング領域y座標
                sy, ey = 0, image.size[1] - 1

                # リストに座標を追加
                sxs.append(sx)
                exs.append(ex)
                sys.append(sy)
                eys.append(ey)
        else:
            #連結部分がない場合、等分割する
            crop_size = image.size[1]
            for i in range(int(as_ratio)):
                sxs.append(i * crop_size)
                exs.append((i + 1) * crop_size - 1)
                sys.append(0)
                eys.append(crop_size - 1)
    else:
        # アスペクト比が1.5未満の時、トリミング領域をしない
        sxs, exs, sys, eys = [0,], [image.size[0] - 1], [0,], [image.size[1] - 1]

    # df_cropに画像データ、パス、座標を追加
    df_crop = pd.DataFrame()
    df_crop["image_id"] = [image_id] * len(sxs)
    df_crop["file_path"] = [file_path] * len(sxs)
    df_crop["sx"] = sxs
    df_crop["ex"] = exs
    df_crop["sy"] = sys
    df_crop["ey"] = eys

    return df_crop


# adjust image

In [10]:
dfs = []

for (file_path, image_id) in zip(df["file_path"], df["image_id"]):
    dfs.append(get_cropped_images(file_path, image_id))

df_crop = pd.concat(dfs)

df_crop["label"] = 0

# delete duplication

In [11]:
df_crop = df_crop.drop_duplicates(subset=["image_id", "sx", "ex", "sy", "ey"]).reset_index(drop=True)

# class of load dataset

In [12]:
class UBCDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.file_names = df['file_path'].values
        self.labels = df['label'].values
        self.transforms = transforms
        self.sxs = df["sx"].values
        self.exs = df["ex"].values
        self.sys = df["sy"].values
        self.eys = df["ey"].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        img_path = self.file_names[index]
        sx, ex, sy, ey = self.sxs[index], self.exs[index], self.sys[index], self.eys[index]

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        img = img[sy:ey, sx:ex, :]

        label = self.labels[index]

        if self.transforms:
            img = self.transforms(image=img)["image"]

        return {
            'image': img,
            'label': torch.tensor(label, dtype=torch.long)
        }


# adjust image

In [13]:
data_transforms = {
    "valid": A.Compose([
        A.Resize(CONFIG['img_size'], CONFIG['img_size']),

        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),

        ToTensorV2()
    ], p=1.)
}

# GeM

In [14]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)

    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

# UBC model

In [15]:
class UBCModel(nn.Module):
    def __init__(self, model_name, num_classes, pretrained=False, checkpoint_path=None):
        super(UBCModel, self).__init__()

        self.model = timm.create_model(model_name, pretrained=pretrained)

        in_features = self.model.classifier.in_features

        #デルの出力は特徴マップとして取得
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()

        self.pooling = GeM()

        self.linear = nn.Linear(in_features, num_classes)

        #出力を確率分布に変換
        self.softmax = nn.Softmax(dim=1)

    def forward(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        output = self.linear(pooled_features)
        return output

#model = UBCModel('tf_efficientnetv2_s_in21ft1k', CONFIG['num_classes'])
model = UBCModel(CONFIG["model_name"], CONFIG['num_classes'])
model2 = UBCModel('tf_efficientnet_b0_ns', CONFIG['num_classes'])
model3 = UBCModel('tf_efficientnet_b0_ns', CONFIG['num_classes'])
model4 = UBCModel('tf_efficientnet_b0_ns', CONFIG['num_classes'])
#model5= UBCModel('tf_efficientnet_b0_ns', CONFIG['num_classes'])

model.load_state_dict(torch.load(BEST_WEIGHT))
model2.load_state_dict(torch.load(BEST_WEIGHT2))
model3.load_state_dict(torch.load(BEST_WEIGHT3))
model4.load_state_dict(torch.load(BEST_WEIGHT3))
#model5.load_state_dict(torch.load(BEST_WEIGHT5))

model.to(CONFIG['device'])
model2.to(CONFIG['device'])
model3.to(CONFIG['device'])
model4.to(CONFIG['device'])
#model5.to(CONFIG['device'])



UBCModel(
  (model): EfficientNet(
    (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNormAct2d(
      32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNormAct2d(
            32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 

# load test dataset

In [16]:
test_dataset = UBCDataset(df_crop, transforms=data_transforms["valid"])

test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG['valid_batch_size'],
    num_workers=2,
    shuffle=False,
    pin_memory=True
)

# prediction

In [17]:
preds = []

with torch.no_grad():
    bar = tqdm(enumerate(test_loader), total=len(test_loader))
    for step, data in bar:
        images = data['image'].to(CONFIG["device"], dtype=torch.float)

        outputs1 = model(images)
        outputs2 = model2(images)
        outputs3 = model3(images)
        outputs4 = model4(images)
        #outputs5 = model5(images)

        outputs = 0.66 * (0.34 * outputs4 + 0.7 * outputs2) + 0.322 * (0.442 * outputs1 + 0.558 * outputs3)

        #outputs = model.softmax(outputs)

        #勾配情報を切り離し、NumPy 配列に変換
        preds.append(outputs.detach().cpu().numpy())

#行方向に積み重ねる
preds = np.vstack(preds)
print(preds.shape)
print(preds)

100%|██████████| 1/1 [00:06<00:00,  6.65s/it]

(1, 5)
[[-1.2605823  -0.09188002  2.160406   -1.8183439  -2.7500184 ]]





# compare 4 models prediction

In [18]:
#DataFrame へのカテゴリ列の追加
for i in range(preds.shape[-1]):
    df_crop[f"cat{i}"] = preds[:, i]

dict_label = {}

#"image_id" 列でグループ化し、各グループにおいて各カテゴリで最大の確率を持つ列のインデックス（最終的な予測ラベル）を取得
for image_id, gdf in df_crop.groupby("image_id"):
    dict_label[image_id] = np.argmax(gdf[[f"cat{i}" for i in range(preds.shape[-1])]].values.max(axis=0))

preds = np.array([dict_label[image_id] for image_id in df["image_id"].values])

# submit csv

In [19]:
pred_labels = encoder.inverse_transform(preds)

df_sub["label"] = pred_labels

df_sub.to_csv("submission.csv", index=False)

In [20]:
df_sub

Unnamed: 0,image_id,label
0,41,HGSC
