In [1]:
#timm용

import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

class MultiClassImageDataset(Dataset):
    def __init__(self, csv_file, img_root_dir, img_column, label_column, img_size=224):
        """
        Multi-class image classification dataset for timm models like MaxVit.

        :param csv_file: Path to the CSV file containing image paths and labels.
        :param img_root_dir: Root directory containing the images.
        :param img_column: Column name in the CSV containing the image file paths.
        :param label_column: Column name in the CSV containing the labels.
        :param img_size: Target image size (square, e.g., 224x224).
        """
        # Load the CSV file into a DataFrame
        self.data = pd.read_csv(csv_file)

        # Image paths
        self.img_root_dir = img_root_dir
        self.img_column = img_column

        # Label column
        self.label_column = label_column

        # Create a label mapping (string to integer)
        self.label_mapping = {label: idx for idx, label in enumerate(sorted(self.data[label_column].unique()))}

        # Define the image transformations (for MaxVit, use ImageNet normalization)
        self.transform = transforms.Compose([
            transforms.Resize((img_size, img_size)),  # Resize to target size
            transforms.ToTensor(),  # Convert image to tensor
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization
        ])

    def __len__(self):
        """Return the total number of samples."""
        return len(self.data)

    def __getitem__(self, idx):
        """
        Fetch an image and its corresponding label by index.

        :param idx: Index of the sample.
        :return: A dictionary with 'pixel_values' (processed image tensor) and 'label' (integer label).
        """
        # Get the image file path
        img_path = os.path.join(self.img_root_dir, self.data.iloc[idx][self.img_column])

        # Load the image
        image = Image.open(img_path).convert("RGB")

        # Apply the transformations
        processed_image =self.transform(image)

        # Map the label to an integer
        label = self.label_mapping[self.data.iloc[idx][self.label_column]]

        return {'pixel_values': processed_image, 'label': label}


In [2]:
import random
import numpy as np
import torch

def set_seed(seed):
    """
    Set the random seed for reproducibility.

    Args:
        seed (int): The seed value to set.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [3]:
# Set a specific seed value
seed_value = 42  # You can choose any integer value
set_seed(seed_value)


In [4]:
#timm용

csv_file = "./train.csv"
img_root_dir = "./"
# Dataset 인스턴스 생성
dataset = MultiClassImageDataset(
    csv_file=csv_file,
    img_root_dir=img_root_dir,
    img_column="img_path",  # CSV 파일의 이미지 경로 열 이름
    label_column="label",
    #model = model,
   # data_config = data_config,# CSV 파일의 라벨 열 이름
    img_size=224  # MaxVit 모델의 입력 크기
)




In [3]:
import os

current_path = os.getcwd()
print(current_path)

/home/idp/lab/song/dl


In [5]:
# Define K-Fold Cross Validation
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
class MaxVitClassifier(nn.Module):
    def __init__(self, base_model, num_classes):
        super(MaxVitClassifier, self).__init__()
        self.base_model = base_model
        self.num_classes = num_classes

        # Replace the head with a custom classification layer
        in_features = self.base_model.head.in_features
        self.base_model.head = nn.Identity()  # Remove original head
        self.classifier = nn.Linear(in_features, self.num_classes)

    def forward(self, x):
        # Extract features
        features = self.base_model.forward_features(x)
        
        #print(features.shape)
        # Ensure global average pooling is applied correctly
        if len(features.shape) == 3:  # [batch_size, seq_len, in_features]
            features = features.mean(dim=1)  # Pool across sequence length
        elif len(features.shape) == 4:  # [batch_size, in_features, H, W]
            features = F.adaptive_avg_pool2d(features, (1, 1)).view(features.size(0), -1)
            
#
        # Pass through classification head
        logits = self.classifier(features)
        return logits

    #def forward(self, x):
    #    # Extract features
    #    features = self.base_model.forward_features(x)
#
    #    # Permute dimensions to [batch_size, in_features, H, W]
    #    features = features.permute(0, 3, 1, 2)
#
    #    # Apply global average pooling
    #    features = F.adaptive_avg_pool2d(features, (1, 1)).view(features.size(0), -1)
#
    #    # Pass through classification head
    #    logits = self.classifier(features)
    #    return logits


# Step 1: Load the pretrained MaxVit model
base_model = timm.create_model('deit3_large_patch16_224.fb_in22k_ft_in1k', pretrained=True)
#eva_large_patch14_336.in22k_ft_in22k_in1k, eva02_large_patch14_448.mim_m38m_ft_in22k_in1k
#convnext_xxlarge.clip_laion2b_soup_ft_in1k
# Step 2: Wrap it in the classifier
num_classes = 25
model = MaxVitClassifier(base_model, num_classes)

In [16]:
!huggingface-cli login
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


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): Traceback (most recent call last):
  File "/home/idp/anaconda3/envs/mus

In [7]:
import huggingface_hub
huggingface_hub.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [8]:
from sklearn.model_selection import KFold
from torch.utils.data import Subset, DataLoader
import torch
import torch.nn as nn
from transformers import AutoImageProcessor, AutoModelForImageClassification
num_epochs = 2
batch_size = 16
learning_rate = 5e-5
num_classes = 10
# Define criterion and device
criterion = torch.nn.CrossEntropyLoss()
# K-Fold Training
fold_results = {}
data_config = timm.data.resolve_model_data_config(model)
transforms = timm.data.create_transform(**data_config, is_training=False)
for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
    print(f"Fold {fold + 1}/{kf.n_splits}")

    # Split dataset into training and validation sets
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)

    # DataLoaders
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

    # Initialize optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for batch in train_loader:
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            #outputs = model(pixel_values=pixel_values)
            #pixel_values = pixel_values.squeeze(0)
            outputs = model(pixel_values)
            logits = outputs
            loss = criterion(logits, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {total_loss / len(train_loader)}")

    # Validation loop
    model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch in val_loader:
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['label'].to(device)

            # Forward pass
            outputs = model(pixel_values)
            logits = outputs
            loss = criterion(logits, labels)

            val_loss += loss.item()

            # Accuracy
            _, predicted = torch.max(logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    accuracy = correct / total
    print(f"Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}")

    # Save results for the fold
    fold_results[fold] = {'val_loss': val_loss, 'accuracy': accuracy}

# Print overall results
print("K-Fold Cross Validation Results:")
for fold, result in fold_results.items():
    print(f"Fold {fold + 1}: Loss: {result['val_loss']:.4f}, Accuracy: {result['accuracy']:.4f}")

Fold 1/5
Epoch 1/2, Training Loss: 0.3202856825563217
Epoch 2/2, Training Loss: 0.05507965279940261
Validation Loss: 0.2269, Accuracy: 0.9403
Fold 2/5
Epoch 1/2, Training Loss: 0.08218301670958589
Epoch 2/2, Training Loss: 0.03371131113188156
Validation Loss: 0.0633, Accuracy: 0.9785
Fold 3/5
Epoch 1/2, Training Loss: 0.04426032769037183
Epoch 2/2, Training Loss: 0.031119451142427406
Validation Loss: 0.0289, Accuracy: 0.9915
Fold 4/5
Epoch 1/2, Training Loss: 0.03269628165681485
Epoch 2/2, Training Loss: 0.027161028096550195
Validation Loss: 0.0143, Accuracy: 0.9953
Fold 5/5
Epoch 1/2, Training Loss: 0.029864186395190763
Epoch 2/2, Training Loss: 0.010032064230340254
Validation Loss: 0.0101, Accuracy: 0.9978
K-Fold Cross Validation Results:
Fold 1: Loss: 0.2269, Accuracy: 0.9403
Fold 2: Loss: 0.0633, Accuracy: 0.9785
Fold 3: Loss: 0.0289, Accuracy: 0.9915
Fold 4: Loss: 0.0143, Accuracy: 0.9953
Fold 5: Loss: 0.0101, Accuracy: 0.9978


In [9]:
torch.save(model, f'model_fold_epoch2_1245.pth')


In [None]:
"""mamba
Fold 1/5
Epoch 1/2, Training Loss: 0.4465740251741748
Epoch 2/2, Training Loss: 0.16250977216553322
Validation Loss: 0.3011, Accuracy: 0.9132
Fold 2/5
Epoch 1/2, Training Loss: 0.1834138295777269
Epoch 2/2, Training Loss: 0.1016049177207716
Validation Loss: 0.1836, Accuracy: 0.9438
Fold 3/5
Epoch 1/2, Training Loss: 0.11439399200452215
Epoch 2/2, Training Loss: 0.08508667345356088
Validation Loss: 0.0977, Accuracy: 0.9703
Fold 4/5
Epoch 1/2, Training Loss: 0.0833165806720945
Epoch 2/2, Training Loss: 0.05709977714797439
Validation Loss: 0.0614, Accuracy: 0.9814
Fold 5/5
Epoch 1/2, Training Loss: 0.0755088838690371
Epoch 2/2, Training Loss: 0.04305739817414408
Validation Loss: 0.1002, Accuracy: 0.9719
K-Fold Cross Validation Results:
Fold 1: Loss: 0.3011, Accuracy: 0.9132
Fold 2: Loss: 0.1836, Accuracy: 0.9438
Fold 3: Loss: 0.0977, Accuracy: 0.9703
Fold 4: Loss: 0.0614, Accuracy: 0.9814
Fold 5: Loss: 0.1002, Accuracy: 0.9719

convmini
Fold 1/5
Epoch 1/2, Training Loss: 0.487710864880361
Epoch 2/2, Training Loss: 0.11407356076256521
Validation Loss: 0.2265, Accuracy: 0.9324
Fold 2/5
Epoch 1/2, Training Loss: 0.12080685623134217
Epoch 2/2, Training Loss: 0.05445691198797065
Validation Loss: 0.0731, Accuracy: 0.9751
Fold 3/5
Epoch 1/2, Training Loss: 0.0641969392777401
Epoch 2/2, Training Loss: 0.033054238663262736
Validation Loss: 0.0308, Accuracy: 0.9896
Fold 4/5
Epoch 1/2, Training Loss: 0.0461322780592356
Epoch 2/2, Training Loss: 0.026519651799774675
Validation Loss: 0.0240, Accuracy: 0.9915
Fold 5/5
Epoch 1/2, Training Loss: 0.039437890470422864
Epoch 2/2, Training Loss: 0.02805027757881362
Validation Loss: 0.0445, Accuracy: 0.9867
K-Fold Cross Validation Results:
Fold 1: Loss: 0.2265, Accuracy: 0.9324
Fold 2: Loss: 0.0731, Accuracy: 0.9751
Fold 3: Loss: 0.0308, Accuracy: 0.9896
Fold 4: Loss: 0.0240, Accuracy: 0.9915
Fold 5: Loss: 0.0445, Accuracy: 0.9867"""

In [9]:
from PIL import Image
import os
import pandas as pd
from torch.utils.data import Dataset
from transformers import AutoImageProcessor
from torchvision import transforms
#timm 용

class MultiClassImageDatasetTest(Dataset):
    def __init__(self, csv_file, img_root_dir, img_column,img_size=384):
        """
        Multi-class image classification dataset for inference.

        :param csv_file: Path to the CSV file containing image paths.
        :param img_root_dir: Root directory containing the images.
        :param img_column: Column name in the CSV containing the image file paths.
        :param processor: Hugging Face AutoImageProcessor for image preprocessing.
        """
        # Load the CSV file into a DataFrame
        self.data = pd.read_csv(csv_file)

        # Image paths
        self.img_root_dir = img_root_dir
        self.img_column = img_column

        # Define the image transformations (for MaxVit, use ImageNet normalization)
        self.transform = transforms.Compose([
            transforms.Resize((img_size, img_size)),  # Resize to target size
            transforms.ToTensor(),  # Convert image to tensor
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization
        ])

    def __len__(self):
        """Return the total number of samples."""
        return len(self.data)

    def __getitem__(self, idx):
        """
        Fetch an image by index and preprocess it.

        :param idx: Index of the sample.
        :return: A dictionary with 'pixel_values' (processed image tensor).
        """
        # Get the image file path
        img_path = os.path.join(self.img_root_dir, self.data.iloc[idx][self.img_column])

        # Load the image
        image = Image.open(img_path).convert("RGB")

        # Apply processor transforms
        processed_image =self.transform(image)
        return {'pixel_values': processed_image}

In [10]:
#timm용

test_csv_file = "./test.csv"
img_root_dir = "./"
img_column = "img_path"

test_dataset = MultiClassImageDatasetTest(
    csv_file=test_csv_file,
    img_root_dir=img_root_dir,
    img_column=img_column
)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [11]:

#timm용
def save_predictions_to_csv_huggingface(model, dataloader, dataset2, submission_file, output_file, device='cpu'):
    """
    Perform inference on a DataLoader using a Hugging Face model and save the results to a CSV file.

    :param model: Hugging Face model for image classification.
    :param dataloader: DataLoader containing the test dataset.
    :param dataset: Dataset object that contains label_mapping.
    :param submission_file: Path to the submission CSV file to update.
    :param output_file: Path to save the updated submission file.
    :param device: Device to perform inference on ('cpu' or 'cuda').
    """
    model.eval()
    model.to(device)
    predictions = []

    # Use the label mapping from the dataset
    label_mapping = {v: k for k, v in dataset.label_mapping.items()}  # Reverse mapping for inference

    with torch.no_grad():
        for batch in dataloader:
            images = batch['pixel_values'].to(device)
            outputs = model(images)
            logits = outputs
            probabilities = torch.softmax(logits, dim=1)
            predicted_class_indices = torch.argmax(probabilities, dim=1)

            # Map predicted indices to class names
            for idx in predicted_class_indices:
                predicted_class = label_mapping[idx.item()]
                predictions.append(predicted_class)

    # Load the submission file
    submission_df = pd.read_csv(submission_file)

    # Update the 'label' column with predictions
    submission_df['label'] = predictions

    # Save the updated submission file
    submission_df.to_csv(output_file, index=False)
    print(f"Predictions saved to {output_file}")


In [12]:
# 추론 및 저장
save_predictions_to_csv_huggingface(
    model=model,
    dataloader=test_dataloader,
    dataset2=dataset,  # 데이터셋 객체 전달
    submission_file="sample_submission.csv",
    output_file="submission_with_predictions.csv",
    device="cuda" if torch.cuda.is_available() else "cpu"
)

Predictions saved to submission_with_predictions.csv
