In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BeitFeatureExtractor, BeitForImageClassification, AdamW
from torch.nn import CrossEntropyLoss
from tqdm import tqdm
from torchvision import transforms

In [2]:
# 데이터 파일 경로
train_csv = 'train.csv'
test_csv = 'test.csv'
submission_csv = 'sample_submission.csv'

# 데이터 로드
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)
submission_df = pd.read_csv(submission_csv)

# Label Encoding
label_encoder = LabelEncoder()
train_df['label'] = label_encoder.fit_transform(train_df['label'])

# 데이터셋 정의
class CustomDataset(Dataset):
    def __init__(self, dataframe, feature_extractor, mode='train', use_upscale=False):
        self.dataframe = dataframe
        self.feature_extractor = feature_extractor
        self.mode = mode
        self.use_upscale = use_upscale
        self.transform = transforms.Resize((256, 256))  # 256x256 크기로 리사이즈

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if self.mode == 'train':
            img_path = self.dataframe.iloc[idx]['upscale_img_path'] if self.use_upscale else self.dataframe.iloc[idx]['img_path']
            label = self.dataframe.iloc[idx]['label']
        else:
            img_path = self.dataframe.iloc[idx]['img_path']
            label = -1  # Dummy label for test mode

        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)  # 256x256 크기로 리사이즈
        inputs = self.feature_extractor(images=image, return_tensors="pt")

        if self.mode == 'train':
            return inputs['pixel_values'].squeeze(0), torch.tensor(label, dtype=torch.long)
        else:
            return inputs['pixel_values'].squeeze(0), self.dataframe.iloc[idx]['id']

# Feature extractor 준비
feature_extractor = BeitFeatureExtractor.from_pretrained('microsoft/beit-base-patch16-224-pt22k')

# 데이터셋 준비
train_dataset = CustomDataset(train_df, feature_extractor, mode='train', use_upscale=True)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

test_dataset = CustomDataset(test_df, feature_extractor, mode='test')
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)



In [3]:
# 모델 준비
model = BeitForImageClassification.from_pretrained('microsoft/beit-base-patch16-224-pt22k', num_labels=len(label_encoder.classes_))
model.to('cuda')

# 옵티마이저 및 손실 함수 정의
optimizer = AdamW(model.parameters(), lr=5e-5)
criterion = CrossEntropyLoss()

# 학습 루프
epochs = 5
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch in tqdm(train_loader):
        inputs, labels = batch
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')

        optimizer.zero_grad()
        outputs = model(pixel_values=inputs)
        loss = criterion(outputs.logits, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-base-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|████████████████████████████████████████████████████████████████████████████████| 990/990 [05:38<00:00,  2.93it/s]


Epoch 1, Loss: 0.8505689562100804


100%|████████████████████████████████████████████████████████████████████████████████| 990/990 [05:46<00:00,  2.86it/s]


Epoch 2, Loss: 0.17630067506823877


100%|████████████████████████████████████████████████████████████████████████████████| 990/990 [05:52<00:00,  2.81it/s]


Epoch 3, Loss: 0.10750261418276814


100%|████████████████████████████████████████████████████████████████████████████████| 990/990 [05:52<00:00,  2.81it/s]


Epoch 4, Loss: 0.07315420675405414


100%|████████████████████████████████████████████████████████████████████████████████| 990/990 [05:51<00:00,  2.82it/s]

Epoch 5, Loss: 0.07409805909191339





In [4]:
# 평가 및 예측
model.eval()
predictions = []
ids = []

with torch.no_grad():
    for batch in tqdm(test_loader):
        inputs, id_batch = batch
        inputs = inputs.to('cuda')

        outputs = model(pixel_values=inputs)
        _, preds = torch.max(outputs.logits, dim=1)
        
        predictions.extend(preds.cpu().numpy())
        ids.extend(id_batch)

100%|████████████████████████████████████████████████████████████████████████████████| 425/425 [00:56<00:00,  7.53it/s]


In [5]:
# 결과 저장
submission_df['label'] = label_encoder.inverse_transform(predictions)
submission_df.to_csv('submission2.csv', index=False)