In [1]:
!pip install pandas
!pip install torch
!pip install torchvision
!pip install scikit-learn
!pip install lpips

Collecting pandas
  Downloading pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl (11.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m29.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.1-py2.py3-none-any.whl (505 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m505.5/505.5 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.1-py2.py3-none-any.whl (345 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.4/345.4 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.2.2 pytz-2024.1 tzdata-2024.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;

In [None]:
# model
import timm
from torch import nn

class MaxViTModel(nn.Module):
    def __init__(self, y_dim: int = 10):
        super(MaxViTModel, self).__init__()
        self.model = timm.create_model(
            "maxvit_xlarge_tf_384",
            pretrained=True,
            num_classes=y_dim,
        )
        # self.model.head = nn.Linear(self.model.head.in_features, y_dim)

    def forward(self, x):
        return self.model(x)


class Swinv2Model(nn.Module):
    def __init__(self, y_dim: int = 10):
        super(Swinv2Model, self).__init__()
        self.model = timm.create_model(
            "swinv2_large_window12to16_192to256",
            pretrained=True,
            num_classes=y_dim,
        )
        # self.model.head = nn.Linear(self.model.head.in_features, y_dim)

    def forward(self, x):
        return self.model(x)
    
class EvaV2Model(nn.Module):
    def __init__(self, y_dim: int = 10):
        super(EvaV2Model, self).__init__()
        self.model = timm.create_model(
            "eva02_large_patch14_224.mim_in22k",
            pretrained=True,
            num_classes=y_dim,
        )

    def forward(self, x):
        return self.model(x)

class EdsrModel(nn.Module):
    def __init__(
        self,
        num_channels,
        num_features,
        num_res_blocks,
        res_scale=0.1,
        upscale_factor=2,
        **kwargs
    ):
        super(EdsrModel, self).__init__()
        self.input_conv = nn.Conv2d(
            num_channels, num_features, kernel_size=3, padding=1
        )
        self.res_blocks = nn.Sequential(
            *[EdsrResidualBlock(num_features, res_scale) for _ in range(num_res_blocks)]
        )
        self.upsample = EdsrUpsampleBlock(num_features, upscale_factor)
        self.output_conv = nn.Conv2d(
            num_features, num_channels, kernel_size=3, padding=1
        )

    def forward(self, x):
        residual = self.input_conv(x)
        out = self.res_blocks(residual)
        out = self.upsample(out + residual)
        out = self.output_conv(out)

        return out


class EdsrResidualBlock(nn.Module):
    def __init__(self, num_features, res_scale):
        super(EdsrResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(num_features, num_features, kernel_size=3, padding=1)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(num_features, num_features, kernel_size=3, padding=1)
        self.res_scale = res_scale

    def forward(self, x):
        residual = self.conv1(x)
        residual = self.relu(residual)
        residual = self.conv2(residual)
        return x + residual * self.res_scale


class EdsrUpsampleBlock(nn.Module):
    def __init__(self, num_features, upscale_factor):
        super(EdsrUpsampleBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            num_features, num_features * (4), kernel_size=3, padding=1
        )
        self.pixel_shuffle1 = nn.PixelShuffle(2)

        self.relu1 = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(
            num_features, num_features * (4), kernel_size=3, padding=1
        )
        self.pixel_shuffle2 = nn.PixelShuffle(2)

        self.relu2 = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv1(x)
        x = self.pixel_shuffle1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.pixel_shuffle2(x)
        x = self.relu2(x)
        return x

In [None]:
# dataset
import os, random

import torch
from torch.utils.data import Dataset
from torchvision import transforms
import torchvision.transforms.functional as TF
from PIL import Image

class CustomDataset(Dataset):
    def __init__(
        self,
        img_path_list: list,
        label_list: list,
        img_dir: str,
        device: str = "cpu",
        transform=None,
        transform_model=None,
    ):
        self.img_dir = img_dir
        self.transform = transform or transforms.ToTensor()
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.device = device
        self.transform_model = (
            transform_model.to(device) if transform_model else transform_model
        )
        if self.transform_model:
            self.transform_model.eval()

    def __len__(self):
        return len(self.img_path_list)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_path_list[idx])
        image = Image.open(img_path)

        if self.transform_model:
            image = TF.to_tensor(image)
            image = TF.normalize(
                image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
            )
            with torch.no_grad():
                image = image.to(self.device)
                image = self.transform_model(image)
                image = image.to("cpu")

        if self.transform:
            image = self.transform(image)

        if self.label_list is not None and len(self.label_list) > 0:
            y = self.label_list[idx]
            return image, y
        else:
            return image


class UpscaleDataset(Dataset):
    def __init__(self, lr_path_list, hr_path_list, img_dir, mode="train", **kwrags):
        self.img_dir = img_dir
        self.mode = mode
        self.lr_path_list = lr_path_list
        self.hr_path_list = hr_path_list

    def __len__(self):
        return len(self.lr_path_list)

    def transform_img(self, lr_img, hr_img):
        if self.mode == "eval" or hr_img == None:
            lr_img_tf = TF.to_tensor(lr_img)
            lr_img_tf = TF.normalize(
                lr_img_tf, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
            )
            return lr_img_tf
        if self.mode == "train":
            lr_img_tf = lr_img
            hr_img_tf = hr_img
            if random.random() > 0.5:
                lr_img_tf = TF.hflip(lr_img_tf)
                hr_img_tf = TF.hflip(hr_img_tf)

            random_rotate_angle = random.randint(0, 360)
            lr_img_tf = TF.rotate(lr_img_tf, random_rotate_angle)
            hr_img_tf = TF.rotate(hr_img_tf, random_rotate_angle)

            lr_img_tf = TF.to_tensor(lr_img_tf)
            hr_img_tf = TF.to_tensor(hr_img_tf)

            lr_img_tf = TF.normalize(
                lr_img_tf, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
            )
            return lr_img_tf, hr_img_tf

    def __getitem__(self, idx):
        lr_img_path = os.path.join(self.img_dir, self.lr_path_list[idx])
        lr_img = Image.open(lr_img_path)

        if self.mode == "train":
            hr_img_path = os.path.join(self.img_dir, self.hr_path_list[idx])
            hr_img = Image.open(hr_img_path)
            lr_img_tf, hr_img_tf = self.transform_img(lr_img, hr_img)

            return lr_img_tf, hr_img_tf

        lr_img_tf = self.transform_img(lr_img)
        return lr_img_tf




In [None]:
# earlystopping

import torch


class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0, name="model"):
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.best_score = None
        self.early_stop = False
        self.counter = 0
        self.val_loss = float("inf")
        self.name = name

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f"EarlyStopping couter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(
                f"Validation loss decreased ({self.val_loss:.6f} --> {val_loss:.6f}).  Saving model ..."
            )
        torch.save(model.state_dict(), f"checkpoint_{self.name}.pt")
        self.val_loss = val_loss


In [None]:
# upscale train
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import lpips
import pandas as pd
import numpy as np

RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)


# split train, validate, test dataset
def split_train_val_test(data, y_label):
    train, test, _, _ = train_test_split(data, data[y_label], test_size=0.01)

    train, val, _, _ = train_test_split(train, train[y_label], test_size=0.25)
    return train, val, test


# label encodeing
def encode_lable(pkl_path, train, val, test):
    if not os.path.exists(pkl_path):
        label_encoding = preprocessing.LabelEncoder()
        train["label"] = label_encoding.fit_transform(train["label"])
        val["label"] = label_encoding.transform(val["label"])
        test["label"] = label_encoding.transform(test["label"])
        label_count = len(label_encoding.classes_)
        with open(pkl_path, "wb") as f:
            pickle.dump(label_encoding, f)

    else:
        with open(pkl_path, "rb") as f:
            label_encoding = pickle.load(f)
        train["label"] = label_encoding.transform(train["label"])
        val["label"] = label_encoding.transform(val["label"])
        test["label"] = label_encoding.transform(test["label"])
        label_count = len(label_encoding.classes_)
    return train, val, test, label_count


# dataloader


def convert_dataset(train, val, test, transform):
    train_dataset = UpscaleDataset(
        train["img_path"].values,
        train["upscale_img_path"].values,
        "/data/",
        transform=transform,
    )
    val_dataset = UpscaleDataset(
        val["img_path"].values,
        val["upscale_img_path"].values,
        "/data/",
        transform=transform,
    )
    test_dataset = UpscaleDataset(
        test["img_path"].values,
        test["upscale_img_path"].values,
        "/data/",
        transform=transform,
    )
    return train_dataset, val_dataset, test_dataset


def get_dataloader(train_dataset, val_dataset, test_dataset, batch_size, shuffle):
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=shuffle)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)
    return train_dataloader, val_dataloader, test_dataloader


# train

def calculate_validation_loss_and_lpips(model, val_dataloader, criterion, device):
    lpips_net = lpips.LPIPS(net="vgg").to(device)
    model.eval()
    total_loss = 0
    total_count = 0
    lpips_values = []
    with torch.no_grad():
        for images, labels in val_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * images.size(0)
            total_count += images.size(0)

            # prediction
            perceptual_distance = lpips_net(outputs, labels)
            lpips_values.append(perceptual_distance.mean().item())

    average_loss = total_loss / total_count
    average_lpips = sum(lpips_values) / len(lpips_values)
    return average_loss, average_lpips


def train_model(
    model,
    train_dataloader,
    val_dataloader,
    criterion,
    optimizer,
    num_epochs,
    device,
    scheduler,
    verbose=True,
):
    for epoch in range(num_epochs):
        print(f"-------------------Epoch {epoch} start-------------------")
        model.train()
        running_loss = 0.0
        current_lr = optimizer.param_groups[0]["lr"]
        print(f"Epoch {epoch+1}: current learning rate = {current_lr}")

        for step, batch in enumerate(train_dataloader):

            images, labels = batch
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if verbose and (step % 50 == 0):
                print(f"Epoch {epoch + 1}, Batch {step}, Loss: {loss.item():.4f}")

        val_loss, lpips = calculate_validation_loss_and_lpips(
            model, val_dataloader=val_dataloader, criterion=criterion, device=device
        )
        early_stopping(val_loss=val_loss, model=model)

        scheduler.step(val_loss)

        if early_stopping.early_stop:
            print("Early stopping")
            break
        print(
            f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, lpips: {lpips:.4f}"
        )


DATA_PATH = "/data/train.csv"
LABEL_ENCODER_PKL_PATH = "project/results/label_encoder/label_encoder.pkl"
IS_LABEL_ENCODER = False
NAME = "image_upscale_32_256_doubleUpsample"
BATCH_SIZE = 16
NUM_EPOCHS = 200
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TRANSFORM = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)
LEARNING_RATE = 0.00001

data = pd.read_csv(DATA_PATH)

train, val, test = split_train_val_test(data, "upscale_img_path")

if IS_LABEL_ENCODER:
    train, val, test, label_count = encode_lable(
        LABEL_ENCODER_PKL_PATH, train, val, test
    )

train_dataset, val_dataset, test_dataset = convert_dataset(
    train, val, test, transform=TRANSFORM
)

train_dataloader, val_dataloader, test_dataloader = get_dataloader(
    train_dataset, val_dataset, test_dataset, BATCH_SIZE, shuffle=True
)

# train
upscale_model = EdsrModel(
    num_channels=3, num_features=256, num_res_blocks=32, upscale_factor=4
).to(DEVICE)


criterion = nn.L1Loss().to(DEVICE)
optimizer = optim.Adam(upscale_model.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, "min", factor=0.1, patience=2)

early_stopping = EarlyStopping(patience=5, verbose=True, name=NAME)

train_model(
    upscale_model,
    train_dataloader,
    val_dataloader,
    criterion,
    optimizer,
    NUM_EPOCHS,
    DEVICE,
    scheduler,
    verbose=True,
)



In [None]:
# model train
import os
import sys
import pickle
import numpy as np
import random

import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from torch.nn.functional import interpolate
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, Subset
from torchvision import transforms
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import f1_score
import pandas as pd



# split train, validate, test dataset
def split_train_val_test(data, y_label):
    train, test, _, _ = train_test_split(
        data, data[y_label], test_size=0.01, stratify=data[y_label]
    )

    train, val, _, _ = train_test_split(
        train, train[y_label], test_size=0.25, stratify=train[y_label]
    )
    return train, val, test


# label encodeing
def encode_lable(pkl_path, train, val, test):
    if not os.path.exists(pkl_path):
        label_encoding = preprocessing.LabelEncoder()
        train["label"] = label_encoding.fit_transform(train["label"])
        val["label"] = label_encoding.transform(val["label"])
        test["label"] = label_encoding.transform(test["label"])
        label_count = len(label_encoding.classes_)
        with open(pkl_path, "wb") as f:
            pickle.dump(label_encoding, f)

    else:
        with open(pkl_path, "rb") as f:
            label_encoding = pickle.load(f)
        train["label"] = label_encoding.transform(train["label"])
        val["label"] = label_encoding.transform(val["label"])
        test["label"] = label_encoding.transform(test["label"])
        label_count = len(label_encoding.classes_)
    return train, val, test, label_count


# dataloader


def convert_dataset(train, val, test, transform, transform_model, device):
    train_dataset = CustomDataset(
        train["img_path"].values,
        train["label"].values,
        "/data/",
        transform=transform,
        transform_model=transform_model,
        device=device,
    )
    val_dataset = CustomDataset(
        val["img_path"].values,
        val["label"].values,
        "/data/",
        transform=transform,
        transform_model=transform_model,
        device=device,
    )
    test_dataset = CustomDataset(
        test["img_path"].values,
        test["label"].values,
        "/data/",
        transform=transform,
    )
    return train_dataset, val_dataset, test_dataset


def get_dataloader(train_dataset, val_dataset, test_dataset, batch_size, shuffle):
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=shuffle)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=shuffle)
    return train_dataloader, val_dataloader, test_dataloader


# train
def calculate_validation_loss_and_f1(model, val_dataloader, criterion, device):
    model.eval()
    total_loss = 0
    total_count = 0
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for images, labels in val_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * images.size(0)
            total_count += images.size(0)

            # prediction
            _, predicted = torch.max(outputs, 1)
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    average_loss = total_loss / total_count
    macro_f1 = f1_score(all_labels, all_predictions, average="macro")
    return average_loss, macro_f1


def train_model(
    model,
    train_dataloader,
    val_dataloader,
    criterion,
    optimizer,
    num_epochs,
    device,
    scheduler,
    verbose=True,
):
    for epoch in range(num_epochs):
        print(f"-------------------Epoch {epoch} start-------------------")
        model.train()
        running_loss = 0.0
        current_lr = optimizer.param_groups[0]["lr"]
        print(f"Epoch {epoch+1}: current learning rate = {current_lr}")

        for step, batch in enumerate(train_dataloader):

            images, labels = batch
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if verbose and (step % 50 == 0):
                print(f"Epoch {epoch + 1}, Batch {step}, Loss: {loss.item():.4f}")

        val_loss, macro_f1 = calculate_validation_loss_and_f1(
            model, val_dataloader=val_dataloader, criterion=criterion, device=device
        )
        early_stopping(val_loss=val_loss, model=model)

        scheduler.step(val_loss)

        if early_stopping.early_stop:
            print("Early stopping")
            break
        print(
            f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Macro f1: {macro_f1:.4f}"
        )

In [None]:
# eva02 upscale_model


DATA_PATH = "/data/train.csv"
LABEL_ENCODER_PKL_PATH = "project/results/label_encoder/label_encoder.pkl"
IS_LABEL_ENCODER = True
NAME = "checkpoint_evav2_224_large_patch14_transform_upscaleModel_sgd"
BATCH_SIZE = 2
NUM_EPOCHS = 1000
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TRANSFORM = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.Resize((224, 224)),
        # transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)
LEARNING_RATE = 0.001

data = pd.read_csv(DATA_PATH)

train, val, test = split_train_val_test(data, "label")

if IS_LABEL_ENCODER:
    train, val, test, label_count = encode_lable(
        LABEL_ENCODER_PKL_PATH, train, val, test
    )


train_dataset, val_dataset, test_dataset = convert_dataset(
    train,
    val,
    test,
    transform=TRANSFORM,
    transform_model=upscale_model,
    device=DEVICE,
)

train_dataloader, val_dataloader, test_dataloader = get_dataloader(
    train_dataset, val_dataset, test_dataset, BATCH_SIZE, shuffle=True
)

# train
first_model = EvaV2Model(y_dim=label_count).to(DEVICE)
criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = optim.SGD(first_model.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, "min", factor=0.1, patience=2)

early_stopping = EarlyStopping(patience=5, verbose=True, name=NAME)

torch.cuda.empty_cache()

train_model(
    first_model,
    train_dataloader,
    val_dataloader,
    criterion,
    optimizer,
    NUM_EPOCHS,
    DEVICE,
    scheduler,
    verbose=True,
)


In [None]:
# second model

DATA_PATH = "/data/train.csv"
LABEL_ENCODER_PKL_PATH = "project/results/label_encoder/label_encoder.pkl"
IS_LABEL_ENCODER = True
NAME = "checkpoint_evav2_224_large_patch14_transform_bicubic_sgd"
BATCH_SIZE = 2
NUM_EPOCHS = 1000
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TRANSFORM = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.Resize((224, 224), interpolation=Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)
LEARNING_RATE = 0.001

data = pd.read_csv(DATA_PATH)

train, val, test = split_train_val_test(data, "label")

if IS_LABEL_ENCODER:
    train, val, test, label_count = encode_lable(
        LABEL_ENCODER_PKL_PATH, train, val, test
    )

train_dataset, val_dataset, test_dataset = convert_dataset(
    train,
    val,
    test,
    transform=TRANSFORM,
    transform_model=None,
    device=DEVICE,
)

train_dataloader, val_dataloader, test_dataloader = get_dataloader(
    train_dataset, val_dataset, test_dataset, BATCH_SIZE, shuffle=True
)

# train
second_model = EvaV2Model(y_dim=label_count).to(DEVICE)
criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = optim.SGD(second_model.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, "min", factor=0.1, patience=2)

early_stopping = EarlyStopping(patience=5, verbose=True, name=NAME)

torch.cuda.empty_cache()

train_model(
    second_model,
    train_dataloader,
    val_dataloader,
    criterion,
    optimizer,
    NUM_EPOCHS,
    DEVICE,
    scheduler,
    verbose=True,
)


In [None]:
# third_model
DATA_PATH = "/data/train.csv"
LABEL_ENCODER_PKL_PATH = "project/results/label_encoder/label_encoder.pkl"
IS_LABEL_ENCODER = True
NAME = "checkpoint_swinv2_256_large_transform_upscaleModel_sgd"
BATCH_SIZE = 2
NUM_EPOCHS = 1000
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TRANSFORM = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.Resize((256, 256)),
        # transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)
LEARNING_RATE = 0.001

data = pd.read_csv(DATA_PATH)

train, val, test = split_train_val_test(data, "label")

if IS_LABEL_ENCODER:
    train, val, test, label_count = encode_lable(
        LABEL_ENCODER_PKL_PATH, train, val, test
    )

train_dataset, val_dataset, test_dataset = convert_dataset(
    train,
    val,
    test,
    transform=TRANSFORM,
    transform_model=upscale_model,
    device=DEVICE,
)

train_dataloader, val_dataloader, test_dataloader = get_dataloader(
    train_dataset, val_dataset, test_dataset, BATCH_SIZE, shuffle=True
)

# train
third_model = Swinv2Model(y_dim=label_count).to(DEVICE)
criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = optim.SGD(third_model.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, "min", factor=0.1, patience=2)

early_stopping = EarlyStopping(patience=5, verbose=True, name=NAME)

torch.cuda.empty_cache()

train_model(
    third_model,
    train_dataloader,
    val_dataloader,
    criterion,
    optimizer,
    NUM_EPOCHS,
    DEVICE,
    scheduler,
    verbose=True,
)


In [None]:
# fourth_model
DATA_PATH = "/data/train.csv"
LABEL_ENCODER_PKL_PATH = "project/results/label_encoder/label_encoder.pkl"
IS_LABEL_ENCODER = True
NAME = "checkpoint_maxvit_384_xlarge_transform_upscaleModel_sgd"
BATCH_SIZE = 2
NUM_EPOCHS = 1000
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TRANSFORM = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.Resize((384, 384)),
        # transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)
LEARNING_RATE = 0.0001

data = pd.read_csv(DATA_PATH)

train, val, test = split_train_val_test(data, "label")

if IS_LABEL_ENCODER:
    train, val, test, label_count = encode_lable(
        LABEL_ENCODER_PKL_PATH, train, val, test
    )

train_dataset, val_dataset, test_dataset = convert_dataset(
    train,
    val,
    test,
    transform=TRANSFORM,
    transform_model=upscale_model,
    device=DEVICE,
)

train_dataloader, val_dataloader, test_dataloader = get_dataloader(
    train_dataset, val_dataset, test_dataset, BATCH_SIZE, shuffle=True
)

# train
fourth_model = MaxViTModel(y_dim=label_count).to(DEVICE)
criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = optim.SGD(fourth_model.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, "min", factor=0.1, patience=2)

early_stopping = EarlyStopping(patience=5, verbose=True, name=NAME)

torch.cuda.empty_cache()

train_model(
    fourth_model,
    train_dataloader,
    val_dataloader,
    criterion,
    optimizer,
    NUM_EPOCHS,
    DEVICE,
    scheduler,
    verbose=True,
)

In [None]:
# ensemble
from collections import defaultdict
from datetime import datetime


INFERENCE_DATA = pd.read_csv("data/test.csv")

with open("project/results/label_encoder/label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)
label_count = len(label_encoder.classes_)

TRANSFORMS = [
    transforms.Compose(
        [
            transforms.Resize((224, 224)),
            # transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
    ),
    transforms.Compose(
        [
            transforms.Resize((224, 224), interpolation=Image.BICUBIC),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
    ),
    transforms.Compose(
        [
            transforms.Resize((256, 256)),
            # transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
    ),

    transforms.Compose(
        [
            transforms.Resize((384, 384)),
            # transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
    ),
]

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


test_datasets = [
    CustomDataset(
        INFERENCE_DATA["img_path"].values,
        None,
        "/data/",
        transform=TRANSFORMS[0],
        device=DEVICE,
        transform_model=upscale_model,
    ),
    CustomDataset(
        INFERENCE_DATA["img_path"].values,
        None,
        "/data/",
        transform=TRANSFORMS[1],
        device=DEVICE,
        transform_model=None,
    ),
    CustomDataset(
        INFERENCE_DATA["img_path"].values,
        None,
        "/data/",
        transform=TRANSFORMS[2],
        device=DEVICE,
        transform_model=upscale_model,
    ),
    CustomDataset(
        INFERENCE_DATA["img_path"].values,
        None,
        "/data/",
        transform=TRANSFORMS[3],
        device=DEVICE,
        transform_model=upscale_model,
    ),
]

test_dataloaders = [
    DataLoader(test_datasets[0], batch_size=1, shuffle=False),
    DataLoader(test_datasets[1], batch_size=1, shuffle=False),
    DataLoader(test_datasets[2], batch_size=1, shuffle=False),
    DataLoader(test_datasets[3], batch_size=1, shuffle=False),
]

models = [
    [
        first_model
    ],
    [
        second_model
    ],
    [
        third_model
    ],
    [
        fourth_model
    ],
]


result = defaultdict(list)
for idx, model in enumerate(models):
    current_model = model[0]

    current_model.eval()

    print(f"---------model {idx+1} inference start---------")
    with torch.no_grad():
        for step, images in enumerate(test_dataloaders[idx]):
            images = images.to(DEVICE)
            outputs = current_model(images)
            result[idx].append(outputs)

    print(f"---------model {idx+1} inference end---------")


final_predictions = []
for batch_idx in range(
    len(test_dataloaders[0])
):  
    batch_predictions = torch.stack(
        [result[model_idx][batch_idx] for model_idx in range(len(models))]
    )
    mean_predictions = torch.mean(batch_predictions, dim=0)

    _, predicted_labels = torch.max(mean_predictions, dim=1)
    final_predictions.extend(predicted_labels.tolist())

result = label_encoder.inverse_transform(final_predictions)

submission = pd.DataFrame()
submission["id"] = INFERENCE_DATA["id"].values
submission["label"] = result

submission.to_csv(f"submission_{datetime.today()}.csv", index=False)
