## Import

In [1]:
import torch
import torch.nn as nn
import pandas as pd
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import os
import numpy as np
import random
import warnings
import dataset as d
warnings.filterwarnings(action='ignore') 

## Hyperparameter Settings

In [2]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':10, #Your Epochs,
    'LR':1e-5, #Your Learning Rate,
    'BATCH_SIZE': 128, #Your Batch Size,
    'SEED':41
}

## Fixed Random-Seed

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Custom Dataset

In [4]:
# dataset.py 의 CustomDataset 클래스 사용
import dataset as d
from util.preprocessing import  *


In [5]:
train_mean = (0.42008194, 0.3838274, 0.34902292)
train_Std = (0.23926373, 0.22593886, 0.22363442)

test_mean = (0.4216005, 0.38125762, 0.34539804)
test_Std = (0.23252015, 0.21890979, 0.21627444)

In [6]:
train_data = pd.read_csv('./data/open/train.csv')
test_data = pd.read_csv('./data/open/test.csv')
train_transform = d.ImageTransForm(CFG['IMG_SIZE'], train_mean, train_Std)
test_transform = d.ImageTransForm(CFG['IMG_SIZE'], test_mean, test_Std)


In [7]:
train_dataset = d.CustomDataset(train_data, 'train', transform=train_transform)
test_dataset = d.CustomDataset(test_data, 'test', transform=test_transform)

In [8]:
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)


In [9]:
dataset = next(iter(train_loader))

In [10]:
dataset[0]

tensor([[[[-1.5263, -1.4115, -1.5099,  ..., -1.2804, -1.3132, -1.4443],
          [-1.5263, -1.4443, -1.4935,  ..., -1.3624, -1.2968, -1.5263],
          [-1.5263, -1.4771, -1.5099,  ..., -1.4607, -1.0182, -1.1821],
          ...,
          [-0.5756, -0.4773, -0.3953,  ..., -1.0018, -1.0673, -1.2640],
          [-1.1165, -0.9198, -0.6412,  ..., -1.2149, -1.1985, -1.0018],
          [-0.6740, -0.5920, -0.4773,  ..., -1.2149, -1.2804, -0.9854]],

         [[-1.4732, -1.3517, -1.4558,  ..., -1.1955, -1.2302, -1.3343],
          [-1.4732, -1.3864, -1.4385,  ..., -1.2822, -1.2128, -1.4211],
          [-1.4732, -1.4211, -1.4558,  ..., -1.3864, -0.9004, -1.0913],
          ...,
          [-0.8483, -0.7789, -0.7615,  ..., -1.0913, -1.1608, -1.3517],
          [-1.2822, -1.0393, -0.8657,  ..., -1.3343, -1.2822, -1.1434],
          [-1.0219, -0.8657, -0.8483,  ..., -1.2822, -1.3690, -1.1434]],

         [[-1.4029, -1.2977, -1.4029,  ..., -1.2100, -1.2626, -1.3853],
          [-1.4029, -1.3327, -

In [11]:
dataset

[tensor([[[[-1.5263, -1.4115, -1.5099,  ..., -1.2804, -1.3132, -1.4443],
           [-1.5263, -1.4443, -1.4935,  ..., -1.3624, -1.2968, -1.5263],
           [-1.5263, -1.4771, -1.5099,  ..., -1.4607, -1.0182, -1.1821],
           ...,
           [-0.5756, -0.4773, -0.3953,  ..., -1.0018, -1.0673, -1.2640],
           [-1.1165, -0.9198, -0.6412,  ..., -1.2149, -1.1985, -1.0018],
           [-0.6740, -0.5920, -0.4773,  ..., -1.2149, -1.2804, -0.9854]],
 
          [[-1.4732, -1.3517, -1.4558,  ..., -1.1955, -1.2302, -1.3343],
           [-1.4732, -1.3864, -1.4385,  ..., -1.2822, -1.2128, -1.4211],
           [-1.4732, -1.4211, -1.4558,  ..., -1.3864, -0.9004, -1.0913],
           ...,
           [-0.8483, -0.7789, -0.7615,  ..., -1.0913, -1.1608, -1.3517],
           [-1.2822, -1.0393, -0.8657,  ..., -1.3343, -1.2822, -1.1434],
           [-1.0219, -0.8657, -0.8483,  ..., -1.2822, -1.3690, -1.1434]],
 
          [[-1.4029, -1.2977, -1.4029,  ..., -1.2100, -1.2626, -1.3853],
           [-

## Define Model

In [12]:
# 데이터 로드
from train.models.encoder_resnet import EncoderResnet
encoder = EncoderResnet(512)
out, mos = encoder(dataset[0])
print(mos.shape)
print(out.shape)

torch.Size([128, 1])
torch.Size([128, 512])


In [13]:
from train.models.seq2seq import Seq2seq
# 단어 사전 생성
all_comments = ' '.join(train_data['comments']).split()
vocab = set(all_comments)
vocab = ['<PAD>', '<SOS>', '<EOS>'] + list(vocab)
word2idx = {word: idx for idx, word in enumerate(vocab)}
idx2word = {idx: word for word, idx in word2idx.items()}




ModuleNotFoundError: No module named 'common.config'

## Train

In [15]:
# 손실함수
criterion = nn.CrossEntropyLoss(ignore_index=word2idx['<PAD>'])



# 학습

for epoch in range(CFG['EPOCHS']):
    total_loss = 0
    loop = tqdm(train_loader, leave=True)
    for imgs, comments in loop:
        # imgs = imgs.float()
        
        # Batch Preprocessing
        comments_tensor = torch.zeros((len(comments), len(max(comments, key=len)))).long()
        comments_tensor.shape()
        break
    #     for i, comment in enumerate(comments):
    #         tokenized = ['<SOS>'] + comment.split() + ['<EOS>']
    #         comments_tensor[i, :len(tokenized)] = torch.tensor([word2idx[word] for word in tokenized])
    #         # print(comments_tensor.size())
            
        
        

    #     vocab_size = len(vocab)
    #     wordvec_size = len(comments_tensor[-1])
    #     hidden_size = 512
    #     model = Seq2seq(vocab_size, wordvec_size, hidden_size)
    #     optimizer = torch.optim.Adam(model.parameters(), lr=CFG['LR'])

    #     loss = model.forward(dataset, comments)
    #     model.backward()
    #     optimizer.update(model.params, model.grads)
    #     total_loss += loss
    #     # Forward & Loss
    #     start_id = comments_tensor[0]
    #     correct = comments_tensor[1:]
    #     predicted_comments = model.generate(dataset, start_id, len(correct))
    #     loss = criterion(predicted_comments.view(-1, len(vocab)), comments_tensor.view(-1))

    #     # Backpropagation
    #     optimizer.zero_grad()
    #     loss.backward()
    #     optimizer.step()
        
    #     total_loss += loss.item()
    #     loop.set_description(f"Epoch {epoch + 1}")
    #     loop.set_postfix(loss=loss.item())

    # print(f"Epoch {epoch + 1} finished with average loss: {total_loss / len(train_loader):.4f}")

NameError: name 'word2idx' is not defined

## Inference & Submit

In [None]:
test_data = pd.read_csv('./data/open/test.csv')
test_dataset = d.CustomDataset(test_data, transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval()
predicted_mos_list = []
predicted_comments_list = []

def greedy_decode(model, image, max_length=50):
    image = image.unsqueeze(0)
    mos, _ = model(image)
    output_sentence = []
    
    # 시작 토큰 설정
    current_token = torch.tensor([word2idx['<SOS>']])
    hidden = None
    features = model.cnn(image).view(image.size(0), -1)

    for _ in range(max_length):
        embeddings = model.embedding(current_token).unsqueeze(0)
        combined = torch.cat([features.unsqueeze(1), embeddings], dim=2)
        out, hidden = model.lstm(combined, hidden)
        
        output = model.fc(out.squeeze(0))
        _, current_token = torch.max(output, dim=1)

        # <EOS> 토큰에 도달하면 멈춤
        if current_token.item() == word2idx['<EOS>']:
            break

        # <SOS> 또는 <PAD> 토큰은 생성한 캡션에 추가하지 않음
        if current_token.item() not in [word2idx['<SOS>'], word2idx['<PAD>']]:
            output_sentence.append(idx2word[current_token.item()])
     
    return mos.item(), ' '.join(output_sentence)

# 추론 과정
with torch.no_grad():
    for imgs, _, _ in tqdm(test_loader):
        for img in imgs:
            img = img.float()
            mos, caption = greedy_decode(model, img)
            predicted_mos_list.append(mos)
            predicted_comments_list.append(caption)

# 결과 저장
result_df = pd.DataFrame({
    'img_name': test_data['img_name'],
    'mos': predicted_mos_list,
    'comments': predicted_comments_list  # 캡션 부분은 위에서 생성한 것을 사용
})

# 예측 결과에 NaN이 있다면, 제출 시 오류가 발생하므로 후처리 진행 (sample_submission.csv과 동일하게)
result_df['comments'] = result_df['comments'].fillna('Nice Image.')
result_df.to_csv('submit.csv', index=False)

print("Inference completed and results saved to submit.csv.")