In [10]:
!pip install peft



In [4]:
import timm
import torch
from PIL import Image
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform

In [5]:

import peft
from datasets import load_dataset

In [7]:
#timm용

import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

class MultiClassImageDataset(Dataset):
    def __init__(self, csv_file, img_root_dir, img_column, label_column, img_size=224):
        """
        Multi-class image classification dataset for timm models like MaxVit.

        :param csv_file: Path to the CSV file containing image paths and labels.
        :param img_root_dir: Root directory containing the images.
        :param img_column: Column name in the CSV containing the image file paths.
        :param label_column: Column name in the CSV containing the labels.
        :param img_size: Target image size (square, e.g., 224x224).
        """
        # Load the CSV file into a DataFrame
        self.data = pd.read_csv(csv_file)

        # Image paths
        self.img_root_dir = img_root_dir
        self.img_column = img_column

        # Label column
        self.label_column = label_column

        # Create a label mapping (string to integer)
        self.label_mapping = {label: idx for idx, label in enumerate(sorted(self.data[label_column].unique()))}

        # Define the image transformations (for MaxVit, use ImageNet normalization)
        self.transform = transforms.Compose([
            transforms.Resize((img_size, img_size)),  # Resize to target size
            transforms.ToTensor(),  # Convert image to tensor
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization
        ])

    def __len__(self):
        """Return the total number of samples."""
        return len(self.data)

    def __getitem__(self, idx):
        """
        Fetch an image and its corresponding label by index.

        :param idx: Index of the sample.
        :return: A dictionary with 'pixel_values' (processed image tensor) and 'label' (integer label).
        """
        # Get the image file path
        img_path = os.path.join(self.img_root_dir, self.data.iloc[idx][self.img_column])

        # Load the image
        image = Image.open(img_path).convert("RGB")

        # Apply the transformations
        processed_image =self.transform(image)

        # Map the label to an integer
        label = self.label_mapping[self.data.iloc[idx][self.label_column]]

        return {'pixel_values': processed_image, 'label': label}


In [8]:
import random
import numpy as np
import torch

def set_seed(seed):
    """
    Set the random seed for reproducibility.

    Args:
        seed (int): The seed value to set.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [9]:
# Set a specific seed value
seed_value = 42  # You can choose any integer value
set_seed(seed_value)


In [10]:
#timm용

csv_file = "./train.csv"
img_root_dir = "./"
# Dataset 인스턴스 생성
dataset = MultiClassImageDataset(
    csv_file=csv_file,
    img_root_dir=img_root_dir,
    img_column="img_path",  # CSV 파일의 이미지 경로 열 이름
    label_column="label",
    #model = model,
   # data_config = data_config,# CSV 파일의 라벨 열 이름
    img_size=224  # MaxVit 모델의 입력 크기
)




In [3]:
import os

current_path = os.getcwd()
print(current_path)

/home/idp/lab/song/dl


In [11]:
# Define K-Fold Cross Validation
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
class MaxVitClassifier(nn.Module):
    def __init__(self, base_model, num_classes):
        super(MaxVitClassifier, self).__init__()
        self.base_model = base_model
        self.num_classes = num_classes

        # Replace the head with a custom classification layer
        in_features = self.base_model.head.in_features
        self.base_model.head = nn.Identity()  # Remove original head
        self.classifier = nn.Linear(in_features, self.num_classes)

    def forward(self, x):
        # Extract features
        features = self.base_model.forward_features(x)
        
        #print(features.shape)
        # Ensure global average pooling is applied correctly
        if len(features.shape) == 3:  # [batch_size, seq_len, in_features]
            features = features.mean(dim=1)  # Pool across sequence length
        elif len(features.shape) == 4:  # [batch_size, in_features, H, W]
            features = F.adaptive_avg_pool2d(features, (1, 1)).view(features.size(0), -1)
            
#
        # Pass through classification head
        logits = self.classifier(features)
        return logits

    #def forward(self, x):
    #    # Extract features
    #    features = self.base_model.forward_features(x)
#
    #    # Permute dimensions to [batch_size, in_features, H, W]
    #    features = features.permute(0, 3, 1, 2)
#
    #    # Apply global average pooling
    #    features = F.adaptive_avg_pool2d(features, (1, 1)).view(features.size(0), -1)
#
    #    # Pass through classification head
    #    logits = self.classifier(features)
    #    return logits


# Step 1: Load the pretrained MaxVit model
base_model = timm.create_model('deit3_large_patch16_224.fb_in22k_ft_in1k', pretrained=True)
#eva_large_patch14_336.in22k_ft_in22k_in1k, eva02_large_patch14_448.mim_m38m_ft_in22k_in1k
#convnext_xxlarge.clip_laion2b_soup_ft_in1k
# Step 2: Wrap it in the classifier
transform = create_transform(**resolve_data_config(base_model.pretrained_cfg, model=base_model))
num_classes = 25
model = MaxVitClassifier(base_model, num_classes)

In [11]:
transform = create_transform(**resolve_data_config(model.base_model.pretrained_cfg, model=model))

In [9]:
[(n, type(m)) for n, m in model.named_modules()][:30]

[('', __main__.MaxVitClassifier),
 ('base_model', timm.models.vision_transformer.VisionTransformer),
 ('base_model.patch_embed', timm.layers.patch_embed.PatchEmbed),
 ('base_model.patch_embed.proj', torch.nn.modules.conv.Conv2d),
 ('base_model.patch_embed.norm', torch.nn.modules.linear.Identity),
 ('base_model.pos_drop', torch.nn.modules.dropout.Dropout),
 ('base_model.patch_drop', torch.nn.modules.linear.Identity),
 ('base_model.norm_pre', torch.nn.modules.linear.Identity),
 ('base_model.blocks', torch.nn.modules.container.Sequential),
 ('base_model.blocks.0', timm.models.vision_transformer.Block),
 ('base_model.blocks.0.norm1', torch.nn.modules.normalization.LayerNorm),
 ('base_model.blocks.0.attn', timm.models.vision_transformer.Attention),
 ('base_model.blocks.0.attn.qkv', torch.nn.modules.linear.Linear),
 ('base_model.blocks.0.attn.q_norm', torch.nn.modules.linear.Identity),
 ('base_model.blocks.0.attn.k_norm', torch.nn.modules.linear.Identity),
 ('base_model.blocks.0.attn.attn_dr

In [10]:
[(n, type(m)) for n, m in model.named_modules()][-5:]

[('base_model.norm', torch.nn.modules.normalization.LayerNorm),
 ('base_model.fc_norm', torch.nn.modules.linear.Identity),
 ('base_model.head_drop', torch.nn.modules.dropout.Dropout),
 ('base_model.head', torch.nn.modules.linear.Identity),
 ('classifier', torch.nn.modules.linear.Linear)]

In [11]:
config = peft.LoraConfig(r=8, target_modules=r".*\.mlp\.fc\d", modules_to_save=["classifier"])

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"
peft_model = peft.get_peft_model(model, config).to(device)
optimizer = torch.optim.Adam(peft_model.parameters(), lr=2e-4)
criterion = torch.nn.CrossEntropyLoss()
peft_model.print_trainable_parameters()

trainable params: 1,991,705 || all params: 305,367,090 || trainable%: 0.6522


In [16]:
!huggingface-cli login
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


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): Traceback (most recent call last):
  File "/home/idp/anaconda3/envs/mus

In [7]:
import huggingface_hub
huggingface_hub.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#model = model.to(device)

In [24]:
from sklearn.model_selection import KFold
from torch.utils.data import Subset, DataLoader
import torch
import torch.nn as nn
from transformers import AutoImageProcessor, AutoModelForImageClassification
num_epochs = 2
batch_size = 16
learning_rate = 5e-5
num_classes = 10
# Define criterion and device
criterion = torch.nn.CrossEntropyLoss()
# K-Fold Training
fold_results = {}
#data_config = timm.data.resolve_model_data_config(peft_model)
#transforms = timm.data.create_transform(**data_config, is_training=False)
for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f"Fold {fold + 1}/{kf.n_splits}")

    # Split dataset into training and validation sets
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)

    # DataLoaders
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

    # Initialize optimizer
    optimizer = torch.optim.AdamW(peft_model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(num_epochs):
        peft_model.train()
        total_loss = 0

        for batch in train_loader:
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            #outputs = model(pixel_values=pixel_values)
            #pixel_values = pixel_values.squeeze(0)
            outputs = peft_model(pixel_values)
            logits = outputs
            loss = criterion(logits, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {total_loss / len(train_loader)}")

    # Validation loop
    peft_model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch in val_loader:
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            outputs = peft_model(pixel_values)
            logits = outputs
            loss = criterion(logits, labels)

            val_loss += loss.item()

            # Accuracy
            _, predicted = torch.max(logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    accuracy = correct / total
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}")

    # Save results for the fold
    fold_results[fold] = {'val_loss': val_loss, 'accuracy': accuracy}

# Print overall results
print("K-Fold Cross Validation Results:")
for fold, result in fold_results.items():
    print(f"Fold {fold + 1}: Loss: {result['val_loss']:.4f}, Accuracy: {result['accuracy']:.4f}")

Fold 1/5
Epoch 1/2, Training Loss: 0.008854657171018562
Epoch 2/2, Training Loss: 0.0029064079967634146
Validation Loss: 0.0027, Accuracy: 1.0000
Fold 2/5
Epoch 1/2, Training Loss: 0.003250337196503193
Epoch 2/2, Training Loss: 0.0026265482694169703
Validation Loss: 0.0037, Accuracy: 0.9994
Fold 3/5


KeyboardInterrupt: 

In [29]:
torch.save(peft_model, f'peftmodel_fold_epoch4_1344.pth')


In [14]:
peft_model = torch.load('peftmodel_fold_epoch4_1344.pth')
peft_model.to(device)

  peft_model = torch.load('peftmodel_fold_epoch4_1344.pth')


PeftModel(
  (base_model): LoraModel(
    (model): MaxVitClassifier(
      (base_model): VisionTransformer(
        (patch_embed): PatchEmbed(
          (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
          (norm): Identity()
        )
        (pos_drop): Dropout(p=0.0, inplace=False)
        (patch_drop): Identity()
        (norm_pre): Identity()
        (blocks): Sequential(
          (0): Block(
            (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
            (attn): Attention(
              (qkv): Linear(in_features=1024, out_features=3072, bias=True)
              (q_norm): Identity()
              (k_norm): Identity()
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=1024, out_features=1024, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
            )
            (ls1): LayerScale()
            (drop_path1): Identity()
            (norm2): LayerNorm((1024,), eps=1e-06, e

In [15]:
from sklearn.model_selection import KFold
from torch.utils.data import Subset, DataLoader
import torch
import torch.nn as nn
from transformers import AutoImageProcessor, AutoModelForImageClassification
num_epochs = 1
batch_size = 32
learning_rate = 5e-4

# Define criterion and device
criterion = torch.nn.CrossEntropyLoss()
# K-Fold Training
fold_results = {}
#data_config = timm.data.resolve_model_data_config(peft_model)
#transforms = timm.data.create_transform(**data_config, is_training=False)
for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f"Fold {fold + 1}/{kf.n_splits}")

    # Split dataset into training and validation sets
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)

    # DataLoaders
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

    # Initialize optimizer
    optimizer = torch.optim.AdamW(peft_model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(num_epochs):
        peft_model.train()
        total_loss = 0

        for batch in train_loader:
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            #outputs = model(pixel_values=pixel_values)
            #pixel_values = pixel_values.squeeze(0)
            outputs = peft_model(pixel_values)
            logits = outputs
            loss = criterion(logits, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {total_loss / len(train_loader)}")

    # Validation loop
    peft_model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch in val_loader:
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            outputs = peft_model(pixel_values)
            logits = outputs
            loss = criterion(logits, labels)

            val_loss += loss.item()

            # Accuracy
            _, predicted = torch.max(logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    accuracy = correct / total
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}")

    # Save results for the fold
    fold_results[fold] = {'val_loss': val_loss, 'accuracy': accuracy}

# Print overall results
print("K-Fold Cross Validation Results:")
for fold, result in fold_results.items():
    print(f"Fold {fold + 1}: Loss: {result['val_loss']:.4f}, Accuracy: {result['accuracy']:.4f}")

Fold 1/5


OutOfMemoryError: CUDA out of memory. Tried to allocate 100.00 MiB. GPU 0 has a total capacity of 23.54 GiB of which 96.81 MiB is free. Process 98638 has 11.87 GiB memory in use. Including non-PyTorch memory, this process has 11.55 GiB memory in use. Of the allocated memory 10.90 GiB is allocated by PyTorch, and 204.56 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
그대로
Fold 1/5
Epoch 1/1, Training Loss: 0.004091352261509495
Validation Loss: 0.0027, Accuracy: 0.9997
Fold 2/5
Epoch 1/1, Training Loss: 0.0016328931443981697
Validation Loss: 0.0010, Accuracy: 1.0000
Fold 3/5
Epoch 1/1, Training Loss: 0.0023818855987527733
Validation Loss: 0.0024, Accuracy: 0.9997
Fold 4/5
Epoch 1/1, Training Loss: 0.0008638568190824562
Validation Loss: 0.0003, Accuracy: 1.0000
Fold 5/5
Epoch 1/1, Training Loss: 0.0030112070329319164
Validation Loss: 0.0013, Accuracy: 1.0000
K-Fold Cross Validation Results:
Fold 1: Loss: 0.0027, Accuracy: 0.9997
Fold 2: Loss: 0.0010, Accuracy: 1.0000
Fold 3: Loss: 0.0024, Accuracy: 0.9997
Fold 4: Loss: 0.0003, Accuracy: 1.0000
Fold 5: Loss: 0.0013, Accuracy: 1.0000



batch_size = 32
learning_rate = 5e-4

Fold 1/5
Epoch 1/1, Training Loss: 0.0694166306278232
Validation Loss: 0.0896, Accuracy: 0.9747
Fold 2/5
Epoch 1/1, Training Loss: 0.05943770932480211
Validation Loss: 0.0509, Accuracy: 0.9836
Fold 3/5
Epoch 1/1, Training Loss: 0.03913040896543981
Validation Loss: 0.0266, Accuracy: 0.9908
Fold 4/5
Epoch 1/1, Training Loss: 0.02210452046196536
Validation Loss: 0.0220, Accuracy: 0.9927
Fold 5/5
Epoch 1/1, Training Loss: 0.014862824712436535
Validation Loss: 0.0160, Accuracy: 0.9953
K-Fold Cross Validation Results:
Fold 1: Loss: 0.0896, Accuracy: 0.9747
Fold 2: Loss: 0.0509, Accuracy: 0.9836
Fold 3: Loss: 0.0266, Accuracy: 0.9908
Fold 4: Loss: 0.0220, Accuracy: 0.9927
Fold 5: Loss: 0.0160, Accuracy: 0.9953

In [None]:
"""mamba
Fold 1/5
Epoch 1/2, Training Loss: 0.4465740251741748
Epoch 2/2, Training Loss: 0.16250977216553322
Validation Loss: 0.3011, Accuracy: 0.9132
Fold 2/5
Epoch 1/2, Training Loss: 0.1834138295777269
Epoch 2/2, Training Loss: 0.1016049177207716
Validation Loss: 0.1836, Accuracy: 0.9438
Fold 3/5
Epoch 1/2, Training Loss: 0.11439399200452215
Epoch 2/2, Training Loss: 0.08508667345356088
Validation Loss: 0.0977, Accuracy: 0.9703
Fold 4/5
Epoch 1/2, Training Loss: 0.0833165806720945
Epoch 2/2, Training Loss: 0.05709977714797439
Validation Loss: 0.0614, Accuracy: 0.9814
Fold 5/5
Epoch 1/2, Training Loss: 0.0755088838690371
Epoch 2/2, Training Loss: 0.04305739817414408
Validation Loss: 0.1002, Accuracy: 0.9719
K-Fold Cross Validation Results:
Fold 1: Loss: 0.3011, Accuracy: 0.9132
Fold 2: Loss: 0.1836, Accuracy: 0.9438
Fold 3: Loss: 0.0977, Accuracy: 0.9703
Fold 4: Loss: 0.0614, Accuracy: 0.9814
Fold 5: Loss: 0.1002, Accuracy: 0.9719

convmini
Fold 1/5
Epoch 1/2, Training Loss: 0.487710864880361
Epoch 2/2, Training Loss: 0.11407356076256521
Validation Loss: 0.2265, Accuracy: 0.9324
Fold 2/5
Epoch 1/2, Training Loss: 0.12080685623134217
Epoch 2/2, Training Loss: 0.05445691198797065
Validation Loss: 0.0731, Accuracy: 0.9751
Fold 3/5
Epoch 1/2, Training Loss: 0.0641969392777401
Epoch 2/2, Training Loss: 0.033054238663262736
Validation Loss: 0.0308, Accuracy: 0.9896
Fold 4/5
Epoch 1/2, Training Loss: 0.0461322780592356
Epoch 2/2, Training Loss: 0.026519651799774675
Validation Loss: 0.0240, Accuracy: 0.9915
Fold 5/5
Epoch 1/2, Training Loss: 0.039437890470422864
Epoch 2/2, Training Loss: 0.02805027757881362
Validation Loss: 0.0445, Accuracy: 0.9867
K-Fold Cross Validation Results:
Fold 1: Loss: 0.2265, Accuracy: 0.9324
Fold 2: Loss: 0.0731, Accuracy: 0.9751
Fold 3: Loss: 0.0308, Accuracy: 0.9896
Fold 4: Loss: 0.0240, Accuracy: 0.9915
Fold 5: Loss: 0.0445, Accuracy: 0.9867

peft
Fold 1/5
Epoch 1/2, Training Loss: 1.253221397043554
Epoch 2/2, Training Loss: 0.1957473995133463
Validation Loss: 0.1787, Accuracy: 0.9495
Fold 2/5
Epoch 1/2, Training Loss: 0.12313004221909472
Epoch 2/2, Training Loss: 0.07577837749564846
Validation Loss: 0.0993, Accuracy: 0.9706
Fold 3/5
Epoch 1/2, Training Loss: 0.06342431691709954
Epoch 2/2, Training Loss: 0.035829081823076286
Validation Loss: 0.0386, Accuracy: 0.9905
Fold 4/5
Epoch 1/2, Training Loss: 0.029754114269117695
Epoch 2/2, Training Loss: 0.014582787669221007
Validation Loss: 0.0188, Accuracy: 0.9959
Fold 5/5
Epoch 1/2, Training Loss: 0.013216604322527322
Epoch 2/2, Training Loss: 0.005126675787975884
Validation Loss: 0.0076, Accuracy: 0.9984
K-Fold Cross Validation Results:
Fold 1: Loss: 0.1787, Accuracy: 0.9495
Fold 2: Loss: 0.0993, Accuracy: 0.9706
Fold 3: Loss: 0.0386, Accuracy: 0.9905
Fold 4: Loss: 0.0188, Accuracy: 0.9959
Fold 5: Loss: 0.0076, Accuracy: 0.9984


"""

In [25]:
from PIL import Image
import os
import pandas as pd
from torch.utils.data import Dataset
from transformers import AutoImageProcessor
from torchvision import transforms
#timm 용

class MultiClassImageDatasetTest(Dataset):
    def __init__(self, csv_file, img_root_dir, img_column,img_size=224):
        """
        Multi-class image classification dataset for inference.

        :param csv_file: Path to the CSV file containing image paths.
        :param img_root_dir: Root directory containing the images.
        :param img_column: Column name in the CSV containing the image file paths.
        :param processor: Hugging Face AutoImageProcessor for image preprocessing.
        """
        # Load the CSV file into a DataFrame
        self.data = pd.read_csv(csv_file)

        # Image paths
        self.img_root_dir = img_root_dir
        self.img_column = img_column

        # Define the image transformations (for MaxVit, use ImageNet normalization)
        self.transform = transforms.Compose([
            transforms.Resize((img_size, img_size)),  # Resize to target size
            transforms.ToTensor(),  # Convert image to tensor
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization
        ])

    def __len__(self):
        """Return the total number of samples."""
        return len(self.data)

    def __getitem__(self, idx):
        """
        Fetch an image by index and preprocess it.

        :param idx: Index of the sample.
        :return: A dictionary with 'pixel_values' (processed image tensor).
        """
        # Get the image file path
        img_path = os.path.join(self.img_root_dir, self.data.iloc[idx][self.img_column])

        # Load the image
        image = Image.open(img_path).convert("RGB")

        # Apply processor transforms
        processed_image =self.transform(image)
        return {'pixel_values': processed_image}

In [26]:
#timm용

test_csv_file = "./test.csv"
img_root_dir = "./"
img_column = "img_path"

test_dataset = MultiClassImageDatasetTest(
    csv_file=test_csv_file,
    img_root_dir=img_root_dir,
    img_column=img_column
)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [27]:

#timm용
def save_predictions_to_csv_huggingface(model, dataloader, dataset2, submission_file, output_file, device='cpu'):
    """
    Perform inference on a DataLoader using a Hugging Face model and save the results to a CSV file.

    :param model: Hugging Face model for image classification.
    :param dataloader: DataLoader containing the test dataset.
    :param dataset: Dataset object that contains label_mapping.
    :param submission_file: Path to the submission CSV file to update.
    :param output_file: Path to save the updated submission file.
    :param device: Device to perform inference on ('cpu' or 'cuda').
    """
    model.eval()
    model.to(device)
    predictions = []

    # Use the label mapping from the dataset
    label_mapping = {v: k for k, v in dataset.label_mapping.items()}  # Reverse mapping for inference

    with torch.no_grad():
        for batch in dataloader:
            images = batch['pixel_values'].to(device)
            outputs = model(images)
            logits = outputs
            probabilities = torch.softmax(logits, dim=1)
            predicted_class_indices = torch.argmax(probabilities, dim=1)

            # Map predicted indices to class names
            for idx in predicted_class_indices:
                predicted_class = label_mapping[idx.item()]
                predictions.append(predicted_class)

    # Load the submission file
    submission_df = pd.read_csv(submission_file)

    # Update the 'label' column with predictions
    submission_df['label'] = predictions

    # Save the updated submission file
    submission_df.to_csv(output_file, index=False)
    print(f"Predictions saved to {output_file}")


In [28]:
# 추론 및 저장
save_predictions_to_csv_huggingface(
    model=peft_model,
    dataloader=test_dataloader,
    dataset2=dataset,  # 데이터셋 객체 전달
    submission_file="sample_submission.csv",
    output_file="submission_with_predictions_peft_fold2.csv",
    device="cuda" if torch.cuda.is_available() else "cpu"
)

Predictions saved to submission_with_predictions_peft_fold2.csv
