In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/Colab Notebooks/Github/AIPlus99

/content/drive/MyDrive/Colab Notebooks/Github/AIPlus99


In [3]:
!ls trained_image_autoencoder/

encoder.pt  projector.pt


In [None]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from tqdm import tqdm
from Homework2_1 import TextToImageClassifier

tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50")
tokenizer.src_lang = "en_XX"
tokenizer.tgt_lang = "fr_XX"


csv_path = "eng_-french/eng_-french.csv"
df = pd.read_csv(csv_path, header=None)
df.columns = ["English", "French"]
df.dropna(inplace=True)
df = df.sample(frac=0.01, random_state=42).reset_index(drop=True)
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

def tokenize_target(texts, max_length=64):
    tokenized = tokenizer(texts, padding="max_length", truncation=True, max_length=max_length, return_tensors="pt")
    return tokenized["input_ids"]

train_targets = tokenize_target(train_data["French"].tolist())
test_targets = tokenize_target(test_data["French"].tolist())

class TranslationDataset(Dataset):
    def __init__(self, sources, targets):
        self.sources = sources
        self.targets = targets

    def __len__(self):
        return len(self.sources)

    def __getitem__(self, idx):
        return self.sources[idx], self.targets[idx]

train_dataset = TranslationDataset(train_data["English"].tolist(), train_targets)
test_dataset = TranslationDataset(test_data["English"].tolist(), test_targets)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        pos = torch.arange(0, max_len).unsqueeze(1)
        div = torch.exp(torch.arange(0, d_model, 2) * -(torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(pos * div)
        pe[:, 1::2] = torch.cos(pos * div)
        self.register_buffer('pe', pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]


class TextToTranslationModel(nn.Module):
    def __init__(self, text_image_encoder, tgt_vocab_size, d_model=512, num_layers=4, nhead=8, dim_feedforward=2048):
        super().__init__()

        self.text_image_encoder = text_image_encoder
        self.project_fused = nn.Linear(1536, d_model)

        self.embedding = nn.Embedding(tgt_vocab_size, d_model)
        self.pos_encoding = PositionalEncoding(d_model)

        decoder_layer = nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward)
        self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers)
        self.fc_out = nn.Linear(d_model, tgt_vocab_size)


    def forward(self, sentence_list, tgt_input, tgt_mask=None):
        sentence_pairs = [(s, s) for s in sentence_list]

        memory = self.text_image_encoder.extract_features(sentence_pairs)
        memory = self.project_fused(memory).unsqueeze(1).transpose(0, 1)

        tgt_emb = self.embedding(tgt_input)
        tgt_emb = self.pos_encoding(tgt_emb).transpose(0, 1)

        out = self.transformer_decoder(tgt_emb, memory, tgt_mask=tgt_mask)
        return self.fc_out(out.transpose(0, 1))

    def generate(self, sentence, max_len=50, sos_token=tokenizer.lang_code_to_id["fr_XX"], eos_token=2):
        self.eval()
        generated = [sos_token]
        for _ in range(max_len):
            tgt_input = torch.tensor([generated], dtype=torch.long).to(next(self.parameters()).device)
            logits = self.forward([sentence], tgt_input)
            next_token = logits[0, -1].argmax(-1).item()
            generated.append(next_token)
            if next_token == eos_token:
                break
        return tokenizer.decode(generated, skip_special_tokens=True)


text_image_encoder = TextToImageClassifier().to("cuda")
model = TextToTranslationModel(text_image_encoder, tgt_vocab_size=tokenizer.vocab_size).to("cuda")
optimizer = Adam(model.parameters(), lr=5e-5)
loss_fn = CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)

for epoch in range(50):
    model.train()
    total_loss = 0
    for src_texts, tgt_ids in tqdm(train_loader):
        decoder_input = tgt_ids[:, :-1].to("cuda")
        decoder_target = tgt_ids[:, 1:].to("cuda")
        outputs = model(src_texts, decoder_input)
        loss = loss_fn(outputs.reshape(-1, outputs.size(-1)), decoder_target.reshape(-1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} | Loss: {total_loss:.4f}")

    print("\n=== Random Translation Samples ===")
    model.eval()

    # 랜덤 샘플 10개 추출
    samples = test_data.sample(10, random_state=42).reset_index(drop=True)

    for i in range(len(samples)):
        en = samples.loc[i, "English"]
        fr = samples.loc[i, "French"]
        pred = model.generate(en)

        print(f"[{i + 1}]")
        print(f"EN (input): {en}")
        print(f"FR (true) : {fr}")
        print(f"FR (pred) : {pred}")
        print("-" * 80)

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--segmind--tiny-sd/snapshots/cad0bd7495fa6c4bcca01b19a723dc91627fe84f/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--segmind--tiny-sd/snapshots/cad0bd7495fa6c4bcca01b19a723dc91627fe84f/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--segmind--tiny-sd/snapshots/cad0bd7495fa6c4bcca01b19a723dc91627fe84f/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--segmind--tiny-sd/snapshots/cad0bd7495fa6c4bcca01b19a723dc91627fe84f/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


가중치를 받아옵니다 'trained_image_autoencoder'
가중치 로드 완료: text_classifier.pt


  0%|          | 0/88 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  1%|          | 1/88 [00:11<15:59, 11.03s/it]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  2%|▏         | 2/88 [00:22<16:25, 11.46s/it]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  3%|▎         | 3/88 [00:33<15:48, 11.16s/it]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  5%|▍         | 4/88 [00:44<15:12, 10.87s/it]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  6%|▌         | 5/88 [00:52<13:39,  9.88s/it]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  7%|▋         | 6/88 [01:01<13:28,  9.86s/it]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  8%|▊         | 7/88 [01:11<13:06,  9.72s/it]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  9%|▉         | 8/88 [01:22<13:27, 10.10s/it]

  0%|          | 0/10 [00:00<?, ?it/s]