In [4]:
import numpy as np
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/semeval/SemEval2025/test.tsv
/kaggle/input/semeval/SemEval2025/clip1.py
/kaggle/input/semeval/SemEval2025/train.tsv
/kaggle/input/semeval/SemEval2025/CS779-Project5-AdMIRe-MidTerm-Presentation-5.pdf
/kaggle/input/semeval/SemEval2025/2022.csv
/kaggle/input/semeval/SemEval2025/Subtask B/subtask_b_sample.tsv
/kaggle/input/semeval/SemEval2025/Subtask B/new blood/15529605996.png
/kaggle/input/semeval/SemEval2025/Subtask B/new blood/17543802828.png
/kaggle/input/semeval/SemEval2025/Subtask B/new blood/s2_18056618867.png
/kaggle/input/semeval/SemEval2025/Subtask B/new blood/14839290985.png
/kaggle/input/semeval/SemEval2025/Subtask B/new blood/19106979831.png
/kaggle/input/semeval/SemEval2025/Subtask B/new blood/s1_18703651931.png
/kaggle/input/semeval/SemEval2025/Subtask B/new blood/13567540079.png
/kaggle/input/semeval/SemEval2025/Subtask B/panda car/12588591958.png
/kaggle/input/semeval/SemEval2025/Subtask B/panda car/s2_16640394825.png
/kaggle/input/semeval/SemEval2025/Subtas

In [8]:
import pandas as pd
import torch
from torch import nn, optim
from transformers import DistilBertModel, DistilBertTokenizer
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm


class ContrastiveDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        sent1, sent2, sim = row['sentence_1'], row['sentence_2'], row['sim']
        sim = 1 if sim == 1 else 0

        encoded1 = self.tokenizer(
            sent1,
            max_length=self.max_len,
            truncation=True,
            padding="max_length",
            return_tensors="pt"
        )
        encoded2 = self.tokenizer(
            sent2,
            max_length=self.max_len,
            truncation=True,
            padding="max_length",
            return_tensors="pt"
        )
        
        return (
            encoded1['input_ids'].squeeze(0),
            encoded1['attention_mask'].squeeze(0),
            encoded2['input_ids'].squeeze(0),
            encoded2['attention_mask'].squeeze(0),
            torch.tensor(sim, dtype=torch.float32)
        )


class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin
        self.cosine_similarity = nn.CosineSimilarity(dim=1)

    def forward(self, output1, output2, label):
        similarity = self.cosine_similarity(output1, output2)
        pos_loss = label * torch.exp(-similarity)
        neg_loss = (1 - label) * torch.exp(similarity - self.margin)
        return torch.mean(pos_loss + neg_loss)


sep_token = "[SEP]"

def insert_sep_inside_sentence(sentence, compound):
    if compound in sentence:
        return sentence.replace(compound, f"{sep_token} {compound} {sep_token}")
    return sentence


def train_model(model, dataloader, optimizer, loss_fn, device, epochs=5, save_path="distilbert_model.bin"):
    model.train()
    model.to(device)

    for epoch in range(epochs):
        total_loss = 0
        for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
            input_ids1, attention_mask1, input_ids2, attention_mask2, labels = [b.to(device) for b in batch]
            outputs1 = model(input_ids1, attention_mask=attention_mask1).last_hidden_state[:, 0, :]
            outputs2 = model(input_ids2, attention_mask=attention_mask2).last_hidden_state[:, 0, :]
            loss = loss_fn(outputs1, outputs2, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")
        torch.save(model.state_dict(), f"{save_path}_epoch_{epoch+1}.bin")
    torch.save(model.state_dict(), save_path)
    print(f"Model saved to {save_path}")


def main():
    df = pd.read_csv('/kaggle/input/semeval/SemEval2025/2022.csv')
    df['sentence_1'] = df.apply(lambda row: insert_sep_inside_sentence(row['sentence_1'], row['MWE1']), axis=1)
    model = DistilBertModel.from_pretrained("distilbert-base-uncased")
    tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
    dataset = ContrastiveDataset(df, tokenizer)
    dataloader = DataLoader(dataset, batch_size=16, shuffle=True)
    loss_fn = ContrastiveLoss(margin=1.0)
    optimizer = optim.AdamW(model.parameters(), lr=5e-5)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    save_path = "distilbert_model_contrastive.bin"
    train_model(model, dataloader, optimizer, loss_fn, device, epochs=10, save_path=save_path)

if __name__ == "__main__":
    main()


Epoch 1/10: 100%|██████████| 411/411 [01:28<00:00,  4.67it/s]


Epoch 1/10, Loss: 0.5155


Epoch 2/10: 100%|██████████| 411/411 [01:28<00:00,  4.66it/s]


Epoch 2/10, Loss: 0.3909


Epoch 3/10: 100%|██████████| 411/411 [01:28<00:00,  4.67it/s]


Epoch 3/10, Loss: 0.3518


Epoch 4/10: 100%|██████████| 411/411 [01:28<00:00,  4.67it/s]


Epoch 4/10, Loss: 0.3321


Epoch 5/10: 100%|██████████| 411/411 [01:28<00:00,  4.67it/s]


Epoch 5/10, Loss: 0.3246


Epoch 6/10: 100%|██████████| 411/411 [01:28<00:00,  4.67it/s]


Epoch 6/10, Loss: 0.3158


Epoch 7/10: 100%|██████████| 411/411 [01:28<00:00,  4.67it/s]


Epoch 7/10, Loss: 0.3127


Epoch 8/10: 100%|██████████| 411/411 [01:28<00:00,  4.67it/s]


Epoch 8/10, Loss: 0.3142


Epoch 9/10: 100%|██████████| 411/411 [01:28<00:00,  4.67it/s]


Epoch 9/10, Loss: 0.3053


Epoch 10/10: 100%|██████████| 411/411 [01:28<00:00,  4.67it/s]


Epoch 10/10, Loss: 0.2998
Model saved to distilbert_model_contrastive.bin


In [9]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import CLIPProcessor, CLIPModel, DistilBertTokenizer, DistilBertModel
from PIL import Image
import pandas as pd
import os
from tqdm import tqdm

image_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
text_model = DistilBertModel.from_pretrained("distilbert-base-uncased") 
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

save_path = "distilbert_model_contrastive.bin"  
text_model.load_state_dict(torch.load(save_path, map_location="cpu")) 
print(f"Loaded trained text model from {save_path}")


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image_model.to(device)
text_model.to(device)


image_embedding_reduction = nn.Linear(768, 512).to(device)  

text_embedding_reduction = nn.Linear(768, 512).to(device)

class ImageTextDataset(Dataset):
    def __init__(self, sentences, image_paths, processor, tokenizer, max_len=77):
        self.sentences = sentences
        self.image_paths = image_paths
        self.processor = processor
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        text = self.sentences[idx]
        image_paths = self.image_paths[5 * idx: 5 * idx + 5]  
        images = [Image.open(path).convert("RGB") for path in image_paths]
        
        encoded_text = self.tokenizer(text, max_length=self.max_len, truncation=True, padding="max_length", return_tensors="pt")
     
        image_inputs = self.processor(images=images, return_tensors="pt", padding=True)
       e
        return encoded_text, image_inputs

class PlackettLuceLoss(nn.Module):
    def __init__(self):
        super(PlackettLuceLoss, self).__init__()

    def forward(self, scores, true_order):
        """
        Computes the Plackett-Luce loss.

        Args:
        - scores: Tensor of shape (batch_size, num_items), predicted scores for each item.
        - true_order: Tensor of shape (batch_size, num_items), true rankings or order.

        Returns:
        - loss: Scalar loss value.
        """
        loss = 0
        for batch_idx in range(scores.size(0)):
            current_scores = scores[batch_idx]
            current_order = true_order[batch_idx]

            # Sort scores and corresponding ground truth
            sorted_scores = current_scores[current_order]
            total_prob = 0

            for rank, score in enumerate(sorted_scores):
                numerator = torch.exp(score)
                denominator = torch.sum(torch.exp(sorted_scores[rank:]))
                total_prob += torch.log(numerator / denominator)

            loss -= total_prob

        return loss / scores.size(0)

df = pd.read_csv('/kaggle/input/semeval/SemEval2025/train.tsv', sep="\t")
ipath = []
sentences = []

for compound in df['compound']:
    ch = "".join(char for char in compound if char != " ")
    path = f'/kaggle/input/semeval/SemEval2025/train/{ch}'
    ipath.extend([os.path.join(path, img) for img in os.listdir(path) if img.endswith(".png")])

sentences.extend(df['sentence'])

dataset = ImageTextDataset(sentences, ipath, processor, tokenizer)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

pl_loss = PlackettLuceLoss()
optimizer = torch.optim.AdamW(list(image_model.parameters()) + list(text_model.parameters()), lr=2e-6)

epochs = 20
for epoch in range(epochs):
    image_model.train()
    text_model.train()
    total_loss = 0

    for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
       
        text_inputs, image_inputs = batch
        
      
        input_ids = text_inputs["input_ids"].squeeze(0).to(device) 
        attention_mask = text_inputs["attention_mask"].squeeze(0).to(device)
        
       
        pixel_values = image_inputs["pixel_values"].squeeze(0).to(device) 

        text_model.to(device)
        image_model.to(device)

        text_outputs = text_model(input_ids=input_ids, attention_mask=attention_mask)

        text_embeddings = text_outputs.last_hidden_state[:, 0, :]  
        text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True)
        text_embeddings = text_embedding_reduction(text_embeddings) 
        text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True)
        image_outputs = image_model.vision_model(pixel_values=pixel_values)
        image_embeddings = image_outputs[0]  
      
        image_embeddings = image_embeddings[:, 0, :]  
        image_embeddings = image_embeddings.to(device) 
     
        image_embeddings = image_embedding_reduction(image_embeddings)  
        image_embeddings = image_embeddings / image_embeddings.norm(dim=-1, keepdim=True)

       
        print(f"Text Embeddings Shape: {text_embeddings.shape}")
        print(f"Image Embeddings Shape: {image_embeddings.shape}")

       
        similarities = torch.matmul(text_embeddings, image_embeddings.T)  
        similarities = similarities.squeeze(0)  

        
        true_order = torch.argsort(similarities, descending=True)

   
        loss = pl_loss(similarities.unsqueeze(0), true_order.unsqueeze(0))
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}")

   
    torch.save(image_model.state_dict(), f"clip_image_model_epoch_{epoch+1}.bin")
    torch.save(text_model.state_dict(), f"distilbert_text_model_epoch_{epoch+1}.bin")

config.json:   0%|          | 0.00/4.19k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/862k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

  text_model.load_state_dict(torch.load(save_path, map_location="cpu"))  # Load state dict


Loaded trained text model from distilbert_model_contrastive.bin


Epoch 1/20:   0%|          | 0/55 [00:00<?, ?it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:   4%|▎         | 2/55 [00:01<00:38,  1.37it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:   5%|▌         | 3/55 [00:02<00:31,  1.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:   7%|▋         | 4/55 [00:02<00:25,  1.96it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:   9%|▉         | 5/55 [00:02<00:22,  2.25it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  11%|█         | 6/55 [00:03<00:20,  2.45it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  13%|█▎        | 7/55 [00:03<00:19,  2.48it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  15%|█▍        | 8/55 [00:03<00:18,  2.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  16%|█▋        | 9/55 [00:04<00:17,  2.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  18%|█▊        | 10/55 [00:04<00:17,  2.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  20%|██        | 11/55 [00:05<00:17,  2.49it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  22%|██▏       | 12/55 [00:05<00:16,  2.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  24%|██▎       | 13/55 [00:05<00:16,  2.55it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  25%|██▌       | 14/55 [00:06<00:15,  2.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  27%|██▋       | 15/55 [00:06<00:14,  2.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  29%|██▉       | 16/55 [00:06<00:13,  2.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  31%|███       | 17/55 [00:07<00:13,  2.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  33%|███▎      | 18/55 [00:07<00:13,  2.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  35%|███▍      | 19/55 [00:07<00:13,  2.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  36%|███▋      | 20/55 [00:08<00:12,  2.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  38%|███▊      | 21/55 [00:08<00:12,  2.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  40%|████      | 22/55 [00:08<00:11,  2.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  42%|████▏     | 23/55 [00:09<00:10,  2.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  44%|████▎     | 24/55 [00:09<00:11,  2.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  45%|████▌     | 25/55 [00:10<00:10,  2.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  47%|████▋     | 26/55 [00:10<00:10,  2.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  49%|████▉     | 27/55 [00:10<00:09,  2.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  51%|█████     | 28/55 [00:10<00:09,  2.98it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  53%|█████▎    | 29/55 [00:11<00:08,  3.03it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  55%|█████▍    | 30/55 [00:11<00:08,  2.94it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  56%|█████▋    | 31/55 [00:12<00:08,  2.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  58%|█████▊    | 32/55 [00:12<00:08,  2.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  60%|██████    | 33/55 [00:12<00:07,  2.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  62%|██████▏   | 34/55 [00:13<00:07,  2.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  64%|██████▎   | 35/55 [00:13<00:06,  2.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  65%|██████▌   | 36/55 [00:13<00:06,  2.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  67%|██████▋   | 37/55 [00:14<00:06,  2.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  69%|██████▉   | 38/55 [00:14<00:05,  2.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  71%|███████   | 39/55 [00:14<00:05,  2.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  73%|███████▎  | 40/55 [00:15<00:05,  2.99it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  75%|███████▍  | 41/55 [00:15<00:04,  2.98it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  76%|███████▋  | 42/55 [00:15<00:04,  2.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  78%|███████▊  | 43/55 [00:16<00:03,  3.07it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  80%|████████  | 44/55 [00:16<00:03,  3.11it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  82%|████████▏ | 45/55 [00:16<00:03,  2.97it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  84%|████████▎ | 46/55 [00:17<00:03,  2.96it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  85%|████████▌ | 47/55 [00:17<00:02,  2.90it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  87%|████████▋ | 48/55 [00:17<00:02,  2.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  89%|████████▉ | 49/55 [00:18<00:02,  2.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  91%|█████████ | 50/55 [00:18<00:01,  2.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  93%|█████████▎| 51/55 [00:19<00:01,  2.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  95%|█████████▍| 52/55 [00:19<00:01,  2.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  96%|█████████▋| 53/55 [00:19<00:00,  2.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20:  98%|█████████▊| 54/55 [00:20<00:00,  2.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 1/20: 100%|██████████| 55/55 [00:20<00:00,  2.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 1, Average Loss: 4.6668



Epoch 2/20:   2%|▏         | 1/55 [00:00<00:14,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:   4%|▎         | 2/55 [00:00<00:14,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:   5%|▌         | 3/55 [00:00<00:14,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:   7%|▋         | 4/55 [00:01<00:13,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:   9%|▉         | 5/55 [00:01<00:13,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  11%|█         | 6/55 [00:01<00:13,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  13%|█▎        | 7/55 [00:01<00:12,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  15%|█▍        | 8/55 [00:02<00:12,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  16%|█▋        | 9/55 [00:02<00:12,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  18%|█▊        | 10/55 [00:02<00:12,  3.51it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  20%|██        | 11/55 [00:03<00:12,  3.55it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  22%|██▏       | 12/55 [00:03<00:12,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  24%|██▎       | 13/55 [00:03<00:11,  3.53it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  25%|██▌       | 14/55 [00:03<00:11,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  27%|██▋       | 15/55 [00:04<00:11,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  29%|██▉       | 16/55 [00:04<00:11,  3.47it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  31%|███       | 17/55 [00:04<00:10,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  33%|███▎      | 18/55 [00:04<00:10,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  35%|███▍      | 19/55 [00:05<00:09,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  36%|███▋      | 20/55 [00:05<00:09,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  38%|███▊      | 21/55 [00:05<00:08,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  40%|████      | 22/55 [00:06<00:08,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  42%|████▏     | 23/55 [00:06<00:08,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  44%|████▎     | 24/55 [00:06<00:08,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  45%|████▌     | 25/55 [00:06<00:08,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  47%|████▋     | 26/55 [00:07<00:07,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  49%|████▉     | 27/55 [00:07<00:07,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  51%|█████     | 28/55 [00:07<00:07,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  53%|█████▎    | 29/55 [00:07<00:07,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  60%|██████    | 33/55 [00:09<00:06,  3.44it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  62%|██████▏   | 34/55 [00:09<00:06,  3.38it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.40it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  65%|██████▌   | 36/55 [00:10<00:05,  3.33it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  67%|██████▋   | 37/55 [00:10<00:05,  3.31it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  69%|██████▉   | 38/55 [00:10<00:05,  3.17it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  71%|███████   | 39/55 [00:11<00:05,  3.14it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  73%|███████▎  | 40/55 [00:11<00:04,  3.10it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  75%|███████▍  | 41/55 [00:11<00:04,  3.15it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  76%|███████▋  | 42/55 [00:11<00:04,  3.23it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  78%|███████▊  | 43/55 [00:12<00:03,  3.32it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  80%|████████  | 44/55 [00:12<00:03,  3.31it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.42it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  84%|████████▎ | 46/55 [00:13<00:02,  3.49it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  85%|████████▌ | 47/55 [00:13<00:02,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  87%|████████▋ | 48/55 [00:13<00:01,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  91%|█████████ | 50/55 [00:14<00:01,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  93%|█████████▎| 51/55 [00:14<00:01,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20:  98%|█████████▊| 54/55 [00:15<00:00,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 2/20: 100%|██████████| 55/55 [00:15<00:00,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 2, Average Loss: 4.3377



Epoch 3/20:   2%|▏         | 1/55 [00:00<00:15,  3.39it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:   4%|▎         | 2/55 [00:00<00:15,  3.52it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:   5%|▌         | 3/55 [00:00<00:14,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:   7%|▋         | 4/55 [00:01<00:13,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:   9%|▉         | 5/55 [00:01<00:13,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  11%|█         | 6/55 [00:01<00:13,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  13%|█▎        | 7/55 [00:01<00:12,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  15%|█▍        | 8/55 [00:02<00:12,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  16%|█▋        | 9/55 [00:02<00:12,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  18%|█▊        | 10/55 [00:02<00:11,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  20%|██        | 11/55 [00:02<00:11,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  22%|██▏       | 12/55 [00:03<00:10,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  24%|██▎       | 13/55 [00:03<00:10,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  25%|██▌       | 14/55 [00:03<00:11,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  27%|██▋       | 15/55 [00:04<00:11,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  29%|██▉       | 16/55 [00:04<00:10,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  31%|███       | 17/55 [00:04<00:10,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  33%|███▎      | 18/55 [00:04<00:09,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  35%|███▍      | 19/55 [00:05<00:09,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  36%|███▋      | 20/55 [00:05<00:09,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  38%|███▊      | 21/55 [00:05<00:09,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  40%|████      | 22/55 [00:05<00:08,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  42%|████▏     | 23/55 [00:06<00:08,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  44%|████▎     | 24/55 [00:06<00:08,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  45%|████▌     | 25/55 [00:06<00:07,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  47%|████▋     | 26/55 [00:06<00:07,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  49%|████▉     | 27/55 [00:07<00:07,  3.96it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  51%|█████     | 28/55 [00:07<00:06,  4.02it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.91it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  55%|█████▍    | 30/55 [00:07<00:06,  3.96it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  60%|██████    | 33/55 [00:08<00:05,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  62%|██████▏   | 34/55 [00:08<00:05,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  65%|██████▌   | 36/55 [00:09<00:05,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  67%|██████▋   | 37/55 [00:09<00:05,  3.43it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  69%|██████▉   | 38/55 [00:10<00:05,  3.38it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  71%|███████   | 39/55 [00:10<00:04,  3.39it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  73%|███████▎  | 40/55 [00:10<00:04,  3.47it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  75%|███████▍  | 41/55 [00:10<00:03,  3.53it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.58it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  80%|████████  | 44/55 [00:11<00:03,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  87%|████████▋ | 48/55 [00:12<00:01,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  91%|█████████ | 50/55 [00:13<00:01,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  95%|█████████▍| 52/55 [00:13<00:00,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 3/20: 100%|██████████| 55/55 [00:14<00:00,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 3, Average Loss: 3.8649



Epoch 4/20:   2%|▏         | 1/55 [00:00<00:15,  3.43it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:   4%|▎         | 2/55 [00:00<00:14,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:   5%|▌         | 3/55 [00:00<00:14,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:   7%|▋         | 4/55 [00:01<00:14,  3.58it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:   9%|▉         | 5/55 [00:01<00:13,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  11%|█         | 6/55 [00:01<00:13,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  13%|█▎        | 7/55 [00:01<00:12,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  15%|█▍        | 8/55 [00:02<00:12,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  16%|█▋        | 9/55 [00:02<00:12,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  18%|█▊        | 10/55 [00:02<00:12,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  20%|██        | 11/55 [00:02<00:12,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  22%|██▏       | 12/55 [00:03<00:11,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  24%|██▎       | 13/55 [00:03<00:11,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  25%|██▌       | 14/55 [00:03<00:10,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  27%|██▋       | 15/55 [00:04<00:10,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  29%|██▉       | 16/55 [00:04<00:10,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  31%|███       | 17/55 [00:04<00:10,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  33%|███▎      | 18/55 [00:04<00:10,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  35%|███▍      | 19/55 [00:05<00:09,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  36%|███▋      | 20/55 [00:05<00:09,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  38%|███▊      | 21/55 [00:05<00:09,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  40%|████      | 22/55 [00:05<00:08,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  42%|████▏     | 23/55 [00:06<00:08,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  44%|████▎     | 24/55 [00:06<00:08,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  45%|████▌     | 25/55 [00:06<00:08,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  47%|████▋     | 26/55 [00:07<00:07,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  49%|████▉     | 27/55 [00:07<00:07,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  51%|█████     | 28/55 [00:07<00:07,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  53%|█████▎    | 29/55 [00:07<00:07,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.53it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  60%|██████    | 33/55 [00:08<00:06,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  65%|██████▌   | 36/55 [00:09<00:05,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  67%|██████▋   | 37/55 [00:10<00:05,  3.35it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  69%|██████▉   | 38/55 [00:10<00:05,  3.28it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  71%|███████   | 39/55 [00:10<00:04,  3.36it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  73%|███████▎  | 40/55 [00:11<00:04,  3.47it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  75%|███████▍  | 41/55 [00:11<00:03,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  80%|████████  | 44/55 [00:12<00:03,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  87%|████████▋ | 48/55 [00:13<00:01,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  91%|█████████ | 50/55 [00:13<00:01,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 4/20: 100%|██████████| 55/55 [00:14<00:00,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 4, Average Loss: 3.4630



Epoch 5/20:   2%|▏         | 1/55 [00:00<00:15,  3.42it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:   4%|▎         | 2/55 [00:00<00:14,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:   5%|▌         | 3/55 [00:00<00:14,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:   7%|▋         | 4/55 [00:01<00:13,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:   9%|▉         | 5/55 [00:01<00:13,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  11%|█         | 6/55 [00:01<00:13,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  13%|█▎        | 7/55 [00:01<00:12,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  15%|█▍        | 8/55 [00:02<00:12,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  16%|█▋        | 9/55 [00:02<00:12,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  18%|█▊        | 10/55 [00:02<00:12,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  20%|██        | 11/55 [00:02<00:11,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  22%|██▏       | 12/55 [00:03<00:11,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  24%|██▎       | 13/55 [00:03<00:11,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  25%|██▌       | 14/55 [00:03<00:10,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  27%|██▋       | 15/55 [00:04<00:10,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  29%|██▉       | 16/55 [00:04<00:10,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  31%|███       | 17/55 [00:04<00:10,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  33%|███▎      | 18/55 [00:04<00:10,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  35%|███▍      | 19/55 [00:05<00:09,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  36%|███▋      | 20/55 [00:05<00:09,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  38%|███▊      | 21/55 [00:05<00:08,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  40%|████      | 22/55 [00:05<00:08,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  42%|████▏     | 23/55 [00:06<00:08,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  44%|████▎     | 24/55 [00:06<00:08,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  45%|████▌     | 25/55 [00:06<00:08,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  47%|████▋     | 26/55 [00:07<00:07,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  49%|████▉     | 27/55 [00:07<00:07,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  51%|█████     | 28/55 [00:07<00:07,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  58%|█████▊    | 32/55 [00:08<00:05,  3.90it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  60%|██████    | 33/55 [00:08<00:05,  3.97it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  65%|██████▌   | 36/55 [00:09<00:04,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  67%|██████▋   | 37/55 [00:09<00:04,  3.91it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  71%|███████   | 39/55 [00:10<00:04,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  73%|███████▎  | 40/55 [00:10<00:03,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  75%|███████▍  | 41/55 [00:10<00:03,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.91it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  80%|████████  | 44/55 [00:11<00:02,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  82%|████████▏ | 45/55 [00:11<00:02,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  87%|████████▋ | 48/55 [00:12<00:01,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  91%|█████████ | 50/55 [00:13<00:01,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  95%|█████████▍| 52/55 [00:13<00:00,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 5/20: 100%|██████████| 55/55 [00:14<00:00,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 5, Average Loss: 3.1713



Epoch 6/20:   2%|▏         | 1/55 [00:00<00:14,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:   4%|▎         | 2/55 [00:00<00:14,  3.58it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:   5%|▌         | 3/55 [00:00<00:14,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:   7%|▋         | 4/55 [00:01<00:13,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:   9%|▉         | 5/55 [00:01<00:12,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  11%|█         | 6/55 [00:01<00:12,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  13%|█▎        | 7/55 [00:01<00:12,  3.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  15%|█▍        | 8/55 [00:02<00:12,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  16%|█▋        | 9/55 [00:02<00:12,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  18%|█▊        | 10/55 [00:02<00:12,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  20%|██        | 11/55 [00:02<00:11,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  22%|██▏       | 12/55 [00:03<00:11,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  24%|██▎       | 13/55 [00:03<00:10,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  25%|██▌       | 14/55 [00:03<00:11,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  27%|██▋       | 15/55 [00:03<00:10,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  29%|██▉       | 16/55 [00:04<00:10,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  31%|███       | 17/55 [00:04<00:09,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  33%|███▎      | 18/55 [00:04<00:09,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  35%|███▍      | 19/55 [00:05<00:09,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  36%|███▋      | 20/55 [00:05<00:09,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  38%|███▊      | 21/55 [00:05<00:09,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  40%|████      | 22/55 [00:05<00:09,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  42%|████▏     | 23/55 [00:06<00:08,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  44%|████▎     | 24/55 [00:06<00:08,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  45%|████▌     | 25/55 [00:06<00:07,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  47%|████▋     | 26/55 [00:06<00:07,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  49%|████▉     | 27/55 [00:07<00:07,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  51%|█████     | 28/55 [00:07<00:06,  3.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  55%|█████▍    | 30/55 [00:07<00:06,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.51it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  60%|██████    | 33/55 [00:08<00:06,  3.33it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  62%|██████▏   | 34/55 [00:09<00:06,  3.20it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  64%|██████▎   | 35/55 [00:09<00:06,  3.16it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  65%|██████▌   | 36/55 [00:09<00:06,  3.15it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  67%|██████▋   | 37/55 [00:10<00:05,  3.03it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  69%|██████▉   | 38/55 [00:10<00:05,  3.14it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  71%|███████   | 39/55 [00:10<00:05,  3.19it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  73%|███████▎  | 40/55 [00:11<00:04,  3.35it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  75%|███████▍  | 41/55 [00:11<00:04,  3.48it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.56it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  80%|████████  | 44/55 [00:12<00:02,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.91it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  87%|████████▋ | 48/55 [00:13<00:01,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  91%|█████████ | 50/55 [00:13<00:01,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.94it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 6/20: 100%|██████████| 55/55 [00:14<00:00,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 6, Average Loss: 3.0185



Epoch 7/20:   2%|▏         | 1/55 [00:00<00:15,  3.45it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:   4%|▎         | 2/55 [00:00<00:14,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:   5%|▌         | 3/55 [00:00<00:13,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:   7%|▋         | 4/55 [00:01<00:13,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:   9%|▉         | 5/55 [00:01<00:13,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  11%|█         | 6/55 [00:01<00:13,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  13%|█▎        | 7/55 [00:01<00:12,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  15%|█▍        | 8/55 [00:02<00:12,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  16%|█▋        | 9/55 [00:02<00:12,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  18%|█▊        | 10/55 [00:02<00:12,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  20%|██        | 11/55 [00:02<00:11,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  22%|██▏       | 12/55 [00:03<00:11,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  24%|██▎       | 13/55 [00:03<00:10,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  25%|██▌       | 14/55 [00:03<00:10,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  27%|██▋       | 15/55 [00:03<00:10,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  29%|██▉       | 16/55 [00:04<00:10,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  31%|███       | 17/55 [00:04<00:10,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  33%|███▎      | 18/55 [00:04<00:09,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  35%|███▍      | 19/55 [00:05<00:09,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  36%|███▋      | 20/55 [00:05<00:09,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  38%|███▊      | 21/55 [00:05<00:09,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  40%|████      | 22/55 [00:05<00:08,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  42%|████▏     | 23/55 [00:06<00:08,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  44%|████▎     | 24/55 [00:06<00:08,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  45%|████▌     | 25/55 [00:06<00:07,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  47%|████▋     | 26/55 [00:06<00:07,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  49%|████▉     | 27/55 [00:07<00:07,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  51%|█████     | 28/55 [00:07<00:07,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  55%|█████▍    | 30/55 [00:07<00:06,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  60%|██████    | 33/55 [00:08<00:05,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  65%|██████▌   | 36/55 [00:09<00:05,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  67%|██████▋   | 37/55 [00:09<00:04,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.53it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  71%|███████   | 39/55 [00:10<00:04,  3.42it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  73%|███████▎  | 40/55 [00:10<00:04,  3.46it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  75%|███████▍  | 41/55 [00:11<00:03,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  80%|████████  | 44/55 [00:11<00:02,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  87%|████████▋ | 48/55 [00:12<00:01,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  91%|█████████ | 50/55 [00:13<00:01,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  95%|█████████▍| 52/55 [00:13<00:00,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 7/20: 100%|██████████| 55/55 [00:14<00:00,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 7, Average Loss: 2.9357



Epoch 8/20:   2%|▏         | 1/55 [00:00<00:14,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:   4%|▎         | 2/55 [00:00<00:14,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:   5%|▌         | 3/55 [00:00<00:13,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:   7%|▋         | 4/55 [00:01<00:13,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:   9%|▉         | 5/55 [00:01<00:13,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  11%|█         | 6/55 [00:01<00:12,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  13%|█▎        | 7/55 [00:01<00:12,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  15%|█▍        | 8/55 [00:02<00:12,  3.90it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  16%|█▋        | 9/55 [00:02<00:11,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  18%|█▊        | 10/55 [00:02<00:12,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  20%|██        | 11/55 [00:02<00:11,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  22%|██▏       | 12/55 [00:03<00:11,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  24%|██▎       | 13/55 [00:03<00:11,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  25%|██▌       | 14/55 [00:03<00:10,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  27%|██▋       | 15/55 [00:04<00:10,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  29%|██▉       | 16/55 [00:04<00:10,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  31%|███       | 17/55 [00:04<00:10,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  33%|███▎      | 18/55 [00:04<00:10,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  35%|███▍      | 19/55 [00:05<00:09,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  36%|███▋      | 20/55 [00:05<00:09,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  38%|███▊      | 21/55 [00:05<00:08,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  40%|████      | 22/55 [00:05<00:08,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  42%|████▏     | 23/55 [00:06<00:08,  3.96it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  44%|████▎     | 24/55 [00:06<00:07,  3.99it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  45%|████▌     | 25/55 [00:06<00:07,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  47%|████▋     | 26/55 [00:06<00:07,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  49%|████▉     | 27/55 [00:07<00:07,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  51%|█████     | 28/55 [00:07<00:07,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  53%|█████▎    | 29/55 [00:07<00:07,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  55%|█████▍    | 30/55 [00:07<00:06,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  60%|██████    | 33/55 [00:08<00:05,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  65%|██████▌   | 36/55 [00:09<00:04,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  67%|██████▋   | 37/55 [00:09<00:05,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.45it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  71%|███████   | 39/55 [00:10<00:04,  3.55it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  73%|███████▎  | 40/55 [00:10<00:04,  3.52it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  75%|███████▍  | 41/55 [00:10<00:03,  3.55it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.53it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  80%|████████  | 44/55 [00:11<00:03,  3.50it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.58it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  87%|████████▋ | 48/55 [00:12<00:02,  3.43it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.44it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  91%|█████████ | 50/55 [00:13<00:01,  3.55it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 8/20: 100%|██████████| 55/55 [00:14<00:00,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 8, Average Loss: 2.8713



Epoch 9/20:   2%|▏         | 1/55 [00:00<00:14,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:   4%|▎         | 2/55 [00:00<00:13,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:   5%|▌         | 3/55 [00:00<00:13,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:   7%|▋         | 4/55 [00:01<00:13,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:   9%|▉         | 5/55 [00:01<00:13,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  11%|█         | 6/55 [00:01<00:13,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  13%|█▎        | 7/55 [00:01<00:12,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  15%|█▍        | 8/55 [00:02<00:12,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  16%|█▋        | 9/55 [00:02<00:12,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  18%|█▊        | 10/55 [00:02<00:12,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  20%|██        | 11/55 [00:02<00:11,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  22%|██▏       | 12/55 [00:03<00:11,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  24%|██▎       | 13/55 [00:03<00:11,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  25%|██▌       | 14/55 [00:03<00:10,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  27%|██▋       | 15/55 [00:04<00:10,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  29%|██▉       | 16/55 [00:04<00:10,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  31%|███       | 17/55 [00:04<00:10,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  33%|███▎      | 18/55 [00:04<00:09,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  35%|███▍      | 19/55 [00:05<00:09,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  36%|███▋      | 20/55 [00:05<00:09,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  38%|███▊      | 21/55 [00:05<00:09,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  40%|████      | 22/55 [00:05<00:09,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  42%|████▏     | 23/55 [00:06<00:08,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  44%|████▎     | 24/55 [00:06<00:08,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  45%|████▌     | 25/55 [00:06<00:07,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  47%|████▋     | 26/55 [00:06<00:07,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  49%|████▉     | 27/55 [00:07<00:07,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  51%|█████     | 28/55 [00:07<00:06,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  60%|██████    | 33/55 [00:08<00:05,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  65%|██████▌   | 36/55 [00:09<00:04,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  67%|██████▋   | 37/55 [00:09<00:04,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  71%|███████   | 39/55 [00:10<00:04,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  73%|███████▎  | 40/55 [00:10<00:03,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  75%|███████▍  | 41/55 [00:10<00:03,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.95it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  80%|████████  | 44/55 [00:11<00:02,  3.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  82%|████████▏ | 45/55 [00:11<00:02,  4.00it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  87%|████████▋ | 48/55 [00:12<00:01,  3.95it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  89%|████████▉ | 49/55 [00:12<00:01,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  91%|█████████ | 50/55 [00:13<00:01,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  95%|█████████▍| 52/55 [00:13<00:00,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  96%|█████████▋| 53/55 [00:13<00:00,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 9/20: 100%|██████████| 55/55 [00:14<00:00,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 9, Average Loss: 2.8162



Epoch 10/20:   2%|▏         | 1/55 [00:00<00:13,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:   4%|▎         | 2/55 [00:00<00:13,  4.03it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:   5%|▌         | 3/55 [00:00<00:13,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:   7%|▋         | 4/55 [00:01<00:13,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:   9%|▉         | 5/55 [00:01<00:13,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  11%|█         | 6/55 [00:01<00:12,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  13%|█▎        | 7/55 [00:01<00:12,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  15%|█▍        | 8/55 [00:02<00:12,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  16%|█▋        | 9/55 [00:02<00:11,  3.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  18%|█▊        | 10/55 [00:02<00:11,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  20%|██        | 11/55 [00:02<00:11,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  22%|██▏       | 12/55 [00:03<00:11,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  24%|██▎       | 13/55 [00:03<00:11,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  25%|██▌       | 14/55 [00:03<00:11,  3.51it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  27%|██▋       | 15/55 [00:04<00:11,  3.55it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  29%|██▉       | 16/55 [00:04<00:10,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  31%|███       | 17/55 [00:04<00:10,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  33%|███▎      | 18/55 [00:04<00:09,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  35%|███▍      | 19/55 [00:05<00:09,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  36%|███▋      | 20/55 [00:05<00:09,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  38%|███▊      | 21/55 [00:05<00:09,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  40%|████      | 22/55 [00:05<00:08,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  42%|████▏     | 23/55 [00:06<00:08,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  44%|████▎     | 24/55 [00:06<00:08,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  45%|████▌     | 25/55 [00:06<00:08,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  47%|████▋     | 26/55 [00:06<00:08,  3.50it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  49%|████▉     | 27/55 [00:07<00:07,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  51%|█████     | 28/55 [00:07<00:07,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  53%|█████▎    | 29/55 [00:07<00:07,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  55%|█████▍    | 30/55 [00:08<00:07,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  60%|██████    | 33/55 [00:08<00:05,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  64%|██████▎   | 35/55 [00:09<00:06,  3.30it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  65%|██████▌   | 36/55 [00:09<00:06,  3.15it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  67%|██████▋   | 37/55 [00:10<00:05,  3.16it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  69%|██████▉   | 38/55 [00:10<00:05,  3.13it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  71%|███████   | 39/55 [00:10<00:05,  2.97it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  73%|███████▎  | 40/55 [00:11<00:04,  3.15it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  75%|███████▍  | 41/55 [00:11<00:04,  3.35it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.48it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.50it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  80%|████████  | 44/55 [00:12<00:03,  3.58it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  87%|████████▋ | 48/55 [00:13<00:01,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  91%|█████████ | 50/55 [00:13<00:01,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  93%|█████████▎| 51/55 [00:14<00:01,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 10/20: 100%|██████████| 55/55 [00:15<00:00,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 10, Average Loss: 2.7837



Epoch 11/20:   2%|▏         | 1/55 [00:00<00:15,  3.53it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:   4%|▎         | 2/55 [00:00<00:14,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:   5%|▌         | 3/55 [00:00<00:13,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:   7%|▋         | 4/55 [00:01<00:13,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:   9%|▉         | 5/55 [00:01<00:13,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  11%|█         | 6/55 [00:01<00:13,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  13%|█▎        | 7/55 [00:01<00:12,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  15%|█▍        | 8/55 [00:02<00:12,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  16%|█▋        | 9/55 [00:02<00:12,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  18%|█▊        | 10/55 [00:02<00:12,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  20%|██        | 11/55 [00:02<00:11,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  22%|██▏       | 12/55 [00:03<00:11,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  24%|██▎       | 13/55 [00:03<00:11,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  25%|██▌       | 14/55 [00:03<00:11,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  27%|██▋       | 15/55 [00:04<00:11,  3.58it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  29%|██▉       | 16/55 [00:04<00:10,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  31%|███       | 17/55 [00:04<00:10,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  33%|███▎      | 18/55 [00:04<00:10,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  35%|███▍      | 19/55 [00:05<00:09,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  36%|███▋      | 20/55 [00:05<00:09,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  38%|███▊      | 21/55 [00:05<00:09,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  40%|████      | 22/55 [00:05<00:09,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  42%|████▏     | 23/55 [00:06<00:08,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  44%|████▎     | 24/55 [00:06<00:08,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  45%|████▌     | 25/55 [00:06<00:08,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  47%|████▋     | 26/55 [00:07<00:07,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  49%|████▉     | 27/55 [00:07<00:07,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  51%|█████     | 28/55 [00:07<00:07,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  60%|██████    | 33/55 [00:08<00:05,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  65%|██████▌   | 36/55 [00:09<00:05,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  67%|██████▋   | 37/55 [00:10<00:05,  3.43it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.44it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  71%|███████   | 39/55 [00:10<00:04,  3.58it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  73%|███████▎  | 40/55 [00:10<00:04,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  75%|███████▍  | 41/55 [00:11<00:03,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  80%|████████  | 44/55 [00:11<00:02,  3.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  87%|████████▋ | 48/55 [00:12<00:01,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  91%|█████████ | 50/55 [00:13<00:01,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  95%|█████████▍| 52/55 [00:13<00:00,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 11/20: 100%|██████████| 55/55 [00:14<00:00,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 11, Average Loss: 2.7669



Epoch 12/20:   2%|▏         | 1/55 [00:00<00:15,  3.46it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:   4%|▎         | 2/55 [00:00<00:14,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:   5%|▌         | 3/55 [00:00<00:14,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:   7%|▋         | 4/55 [00:01<00:14,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:   9%|▉         | 5/55 [00:01<00:13,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  11%|█         | 6/55 [00:01<00:13,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  13%|█▎        | 7/55 [00:01<00:13,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  15%|█▍        | 8/55 [00:02<00:12,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  16%|█▋        | 9/55 [00:02<00:11,  3.94it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  18%|█▊        | 10/55 [00:02<00:11,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  20%|██        | 11/55 [00:02<00:11,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  22%|██▏       | 12/55 [00:03<00:11,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  24%|██▎       | 13/55 [00:03<00:10,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  25%|██▌       | 14/55 [00:03<00:10,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  27%|██▋       | 15/55 [00:04<00:10,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  29%|██▉       | 16/55 [00:04<00:10,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  31%|███       | 17/55 [00:04<00:10,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  33%|███▎      | 18/55 [00:04<00:10,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  35%|███▍      | 19/55 [00:05<00:10,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  36%|███▋      | 20/55 [00:05<00:09,  3.51it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  38%|███▊      | 21/55 [00:05<00:09,  3.47it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  40%|████      | 22/55 [00:06<00:09,  3.44it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  42%|████▏     | 23/55 [00:06<00:09,  3.52it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  44%|████▎     | 24/55 [00:06<00:09,  3.33it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  45%|████▌     | 25/55 [00:06<00:09,  3.32it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  47%|████▋     | 26/55 [00:07<00:08,  3.47it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  49%|████▉     | 27/55 [00:07<00:07,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  51%|█████     | 28/55 [00:07<00:07,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  53%|█████▎    | 29/55 [00:08<00:07,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  55%|█████▍    | 30/55 [00:08<00:07,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  60%|██████    | 33/55 [00:09<00:06,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  65%|██████▌   | 36/55 [00:09<00:05,  3.55it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  67%|██████▋   | 37/55 [00:10<00:05,  3.46it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  69%|██████▉   | 38/55 [00:10<00:05,  3.39it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  71%|███████   | 39/55 [00:10<00:04,  3.48it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  73%|███████▎  | 40/55 [00:11<00:04,  3.49it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  75%|███████▍  | 41/55 [00:11<00:03,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  80%|████████  | 44/55 [00:12<00:02,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  87%|████████▋ | 48/55 [00:13<00:01,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  91%|█████████ | 50/55 [00:13<00:01,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  93%|█████████▎| 51/55 [00:14<00:01,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 12/20: 100%|██████████| 55/55 [00:15<00:00,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 12, Average Loss: 2.7497



Epoch 13/20:   2%|▏         | 1/55 [00:00<00:15,  3.56it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:   4%|▎         | 2/55 [00:00<00:14,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:   5%|▌         | 3/55 [00:00<00:14,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:   7%|▋         | 4/55 [00:01<00:13,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:   9%|▉         | 5/55 [00:01<00:13,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  11%|█         | 6/55 [00:01<00:13,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  13%|█▎        | 7/55 [00:01<00:13,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  15%|█▍        | 8/55 [00:02<00:13,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  16%|█▋        | 9/55 [00:02<00:12,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  18%|█▊        | 10/55 [00:02<00:12,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  20%|██        | 11/55 [00:02<00:11,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  22%|██▏       | 12/55 [00:03<00:11,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  24%|██▎       | 13/55 [00:03<00:11,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  25%|██▌       | 14/55 [00:03<00:11,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  27%|██▋       | 15/55 [00:04<00:10,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  29%|██▉       | 16/55 [00:04<00:10,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  31%|███       | 17/55 [00:04<00:09,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  33%|███▎      | 18/55 [00:04<00:09,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  35%|███▍      | 19/55 [00:05<00:09,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  36%|███▋      | 20/55 [00:05<00:09,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  38%|███▊      | 21/55 [00:05<00:09,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  40%|████      | 22/55 [00:05<00:08,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  42%|████▏     | 23/55 [00:06<00:08,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  44%|████▎     | 24/55 [00:06<00:08,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  45%|████▌     | 25/55 [00:06<00:07,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  47%|████▋     | 26/55 [00:06<00:07,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  49%|████▉     | 27/55 [00:07<00:07,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  51%|█████     | 28/55 [00:07<00:06,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  60%|██████    | 33/55 [00:08<00:05,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  65%|██████▌   | 36/55 [00:09<00:04,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  67%|██████▋   | 37/55 [00:09<00:04,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  71%|███████   | 39/55 [00:10<00:04,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  73%|███████▎  | 40/55 [00:10<00:03,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  75%|███████▍  | 41/55 [00:10<00:03,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  80%|████████  | 44/55 [00:11<00:02,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  82%|████████▏ | 45/55 [00:11<00:02,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  87%|████████▋ | 48/55 [00:12<00:01,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  91%|█████████ | 50/55 [00:13<00:01,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  95%|█████████▍| 52/55 [00:13<00:00,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 13/20: 100%|██████████| 55/55 [00:14<00:00,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 13, Average Loss: 2.7396



Epoch 14/20:   2%|▏         | 1/55 [00:00<00:14,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:   4%|▎         | 2/55 [00:00<00:14,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:   5%|▌         | 3/55 [00:00<00:13,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:   7%|▋         | 4/55 [00:01<00:13,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:   9%|▉         | 5/55 [00:01<00:13,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  11%|█         | 6/55 [00:01<00:12,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  13%|█▎        | 7/55 [00:01<00:12,  3.90it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  15%|█▍        | 8/55 [00:02<00:12,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  16%|█▋        | 9/55 [00:02<00:12,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  18%|█▊        | 10/55 [00:02<00:12,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  20%|██        | 11/55 [00:02<00:11,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  22%|██▏       | 12/55 [00:03<00:11,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  24%|██▎       | 13/55 [00:03<00:11,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  25%|██▌       | 14/55 [00:03<00:10,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  27%|██▋       | 15/55 [00:04<00:10,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  29%|██▉       | 16/55 [00:04<00:10,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  31%|███       | 17/55 [00:04<00:11,  3.38it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  33%|███▎      | 18/55 [00:04<00:10,  3.38it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  35%|███▍      | 19/55 [00:05<00:10,  3.46it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  36%|███▋      | 20/55 [00:05<00:09,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  38%|███▊      | 21/55 [00:05<00:10,  3.36it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  40%|████      | 22/55 [00:06<00:09,  3.39it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  42%|████▏     | 23/55 [00:06<00:09,  3.47it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  44%|████▎     | 24/55 [00:06<00:08,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  45%|████▌     | 25/55 [00:06<00:08,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  47%|████▋     | 26/55 [00:07<00:07,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  49%|████▉     | 27/55 [00:07<00:07,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  51%|█████     | 28/55 [00:07<00:07,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  58%|█████▊    | 32/55 [00:08<00:05,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  60%|██████    | 33/55 [00:08<00:05,  3.94it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.94it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.91it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  65%|██████▌   | 36/55 [00:09<00:04,  3.90it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  67%|██████▋   | 37/55 [00:09<00:04,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  71%|███████   | 39/55 [00:10<00:04,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  73%|███████▎  | 40/55 [00:10<00:03,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  75%|███████▍  | 41/55 [00:11<00:03,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  80%|████████  | 44/55 [00:11<00:02,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  87%|████████▋ | 48/55 [00:12<00:01,  3.58it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  91%|█████████ | 50/55 [00:13<00:01,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 14/20: 100%|██████████| 55/55 [00:14<00:00,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 14, Average Loss: 2.7289



Epoch 15/20:   2%|▏         | 1/55 [00:00<00:14,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:   4%|▎         | 2/55 [00:00<00:14,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:   5%|▌         | 3/55 [00:00<00:13,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:   7%|▋         | 4/55 [00:01<00:13,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:   9%|▉         | 5/55 [00:01<00:13,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  11%|█         | 6/55 [00:01<00:13,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  13%|█▎        | 7/55 [00:01<00:12,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  15%|█▍        | 8/55 [00:02<00:12,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  16%|█▋        | 9/55 [00:02<00:12,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  18%|█▊        | 10/55 [00:02<00:11,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  20%|██        | 11/55 [00:02<00:11,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  22%|██▏       | 12/55 [00:03<00:11,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  24%|██▎       | 13/55 [00:03<00:10,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  25%|██▌       | 14/55 [00:03<00:11,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  27%|██▋       | 15/55 [00:03<00:10,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  29%|██▉       | 16/55 [00:04<00:10,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  31%|███       | 17/55 [00:04<00:10,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  33%|███▎      | 18/55 [00:04<00:09,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  35%|███▍      | 19/55 [00:05<00:09,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  36%|███▋      | 20/55 [00:05<00:09,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  38%|███▊      | 21/55 [00:05<00:09,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  40%|████      | 22/55 [00:05<00:09,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  42%|████▏     | 23/55 [00:06<00:08,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  44%|████▎     | 24/55 [00:06<00:08,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  45%|████▌     | 25/55 [00:06<00:08,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  47%|████▋     | 26/55 [00:06<00:07,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  49%|████▉     | 27/55 [00:07<00:07,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  51%|█████     | 28/55 [00:07<00:07,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  55%|█████▍    | 30/55 [00:07<00:06,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  60%|██████    | 33/55 [00:08<00:05,  3.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  65%|██████▌   | 36/55 [00:09<00:05,  3.58it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  67%|██████▋   | 37/55 [00:09<00:05,  3.50it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  71%|███████   | 39/55 [00:10<00:04,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  73%|███████▎  | 40/55 [00:10<00:03,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  75%|███████▍  | 41/55 [00:10<00:03,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.91it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  80%|████████  | 44/55 [00:11<00:02,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  82%|████████▏ | 45/55 [00:11<00:02,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.98it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.95it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  87%|████████▋ | 48/55 [00:12<00:01,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  89%|████████▉ | 49/55 [00:12<00:01,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  91%|█████████ | 50/55 [00:13<00:01,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  95%|█████████▍| 52/55 [00:13<00:00,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 15/20: 100%|██████████| 55/55 [00:14<00:00,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 15, Average Loss: 2.7200



Epoch 16/20:   2%|▏         | 1/55 [00:00<00:14,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:   4%|▎         | 2/55 [00:00<00:14,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:   5%|▌         | 3/55 [00:00<00:13,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:   7%|▋         | 4/55 [00:01<00:13,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:   9%|▉         | 5/55 [00:01<00:13,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  11%|█         | 6/55 [00:01<00:12,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  13%|█▎        | 7/55 [00:01<00:12,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  15%|█▍        | 8/55 [00:02<00:12,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  16%|█▋        | 9/55 [00:02<00:12,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  18%|█▊        | 10/55 [00:02<00:11,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  20%|██        | 11/55 [00:02<00:11,  3.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  22%|██▏       | 12/55 [00:03<00:10,  3.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  24%|██▎       | 13/55 [00:03<00:11,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  25%|██▌       | 14/55 [00:03<00:11,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  27%|██▋       | 15/55 [00:03<00:11,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  29%|██▉       | 16/55 [00:04<00:11,  3.45it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  31%|███       | 17/55 [00:04<00:11,  3.42it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  33%|███▎      | 18/55 [00:04<00:11,  3.32it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  35%|███▍      | 19/55 [00:05<00:11,  3.27it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  36%|███▋      | 20/55 [00:05<00:10,  3.38it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  38%|███▊      | 21/55 [00:05<00:09,  3.49it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  40%|████      | 22/55 [00:06<00:09,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  42%|████▏     | 23/55 [00:06<00:08,  3.56it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  44%|████▎     | 24/55 [00:06<00:08,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  45%|████▌     | 25/55 [00:06<00:08,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  47%|████▋     | 26/55 [00:07<00:07,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  49%|████▉     | 27/55 [00:07<00:07,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  51%|█████     | 28/55 [00:07<00:07,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  53%|█████▎    | 29/55 [00:07<00:07,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  60%|██████    | 33/55 [00:08<00:05,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.92it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  65%|██████▌   | 36/55 [00:09<00:04,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  67%|██████▋   | 37/55 [00:10<00:04,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  71%|███████   | 39/55 [00:10<00:04,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  73%|███████▎  | 40/55 [00:10<00:03,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  75%|███████▍  | 41/55 [00:11<00:03,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  80%|████████  | 44/55 [00:11<00:02,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  87%|████████▋ | 48/55 [00:12<00:01,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  91%|█████████ | 50/55 [00:13<00:01,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.56it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.55it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 16/20: 100%|██████████| 55/55 [00:14<00:00,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 16, Average Loss: 2.7160



Epoch 17/20:   2%|▏         | 1/55 [00:00<00:16,  3.24it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:   4%|▎         | 2/55 [00:00<00:15,  3.51it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:   5%|▌         | 3/55 [00:00<00:14,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:   7%|▋         | 4/55 [00:01<00:14,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:   9%|▉         | 5/55 [00:01<00:13,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  11%|█         | 6/55 [00:01<00:12,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  13%|█▎        | 7/55 [00:01<00:12,  3.94it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  15%|█▍        | 8/55 [00:02<00:12,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  16%|█▋        | 9/55 [00:02<00:12,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  18%|█▊        | 10/55 [00:02<00:11,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  20%|██        | 11/55 [00:02<00:11,  3.94it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  22%|██▏       | 12/55 [00:03<00:10,  3.95it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  24%|██▎       | 13/55 [00:03<00:10,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  25%|██▌       | 14/55 [00:03<00:10,  3.94it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  27%|██▋       | 15/55 [00:03<00:10,  3.93it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  29%|██▉       | 16/55 [00:04<00:10,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  31%|███       | 17/55 [00:04<00:09,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  33%|███▎      | 18/55 [00:04<00:09,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  35%|███▍      | 19/55 [00:04<00:09,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  36%|███▋      | 20/55 [00:05<00:09,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  38%|███▊      | 21/55 [00:05<00:09,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  40%|████      | 22/55 [00:05<00:08,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  42%|████▏     | 23/55 [00:06<00:08,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  44%|████▎     | 24/55 [00:06<00:08,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  45%|████▌     | 25/55 [00:06<00:07,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  47%|████▋     | 26/55 [00:06<00:07,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  49%|████▉     | 27/55 [00:07<00:07,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  51%|█████     | 28/55 [00:07<00:07,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  55%|█████▍    | 30/55 [00:07<00:06,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  58%|█████▊    | 32/55 [00:08<00:05,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  60%|██████    | 33/55 [00:08<00:05,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  62%|██████▏   | 34/55 [00:08<00:05,  3.88it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.98it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  65%|██████▌   | 36/55 [00:09<00:04,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  67%|██████▋   | 37/55 [00:09<00:04,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  69%|██████▉   | 38/55 [00:09<00:04,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  71%|███████   | 39/55 [00:10<00:04,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  73%|███████▎  | 40/55 [00:10<00:04,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  75%|███████▍  | 41/55 [00:10<00:03,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  80%|████████  | 44/55 [00:11<00:02,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  82%|████████▏ | 45/55 [00:11<00:02,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  87%|████████▋ | 48/55 [00:12<00:01,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  89%|████████▉ | 49/55 [00:12<00:01,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  91%|█████████ | 50/55 [00:13<00:01,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  95%|█████████▍| 52/55 [00:13<00:00,  3.85it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  96%|█████████▋| 53/55 [00:13<00:00,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 17/20: 100%|██████████| 55/55 [00:14<00:00,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 17, Average Loss: 2.7121



Epoch 18/20:   2%|▏         | 1/55 [00:00<00:15,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:   4%|▎         | 2/55 [00:00<00:15,  3.42it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:   5%|▌         | 3/55 [00:00<00:16,  3.24it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:   7%|▋         | 4/55 [00:01<00:15,  3.27it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:   9%|▉         | 5/55 [00:01<00:14,  3.47it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  11%|█         | 6/55 [00:01<00:13,  3.56it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  13%|█▎        | 7/55 [00:01<00:13,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  15%|█▍        | 8/55 [00:02<00:12,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  16%|█▋        | 9/55 [00:02<00:12,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  18%|█▊        | 10/55 [00:02<00:11,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  20%|██        | 11/55 [00:03<00:11,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  22%|██▏       | 12/55 [00:03<00:11,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  24%|██▎       | 13/55 [00:03<00:11,  3.52it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  25%|██▌       | 14/55 [00:03<00:11,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  27%|██▋       | 15/55 [00:04<00:11,  3.43it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  29%|██▉       | 16/55 [00:04<00:11,  3.40it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  31%|███       | 17/55 [00:04<00:10,  3.53it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  33%|███▎      | 18/55 [00:05<00:10,  3.52it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  35%|███▍      | 19/55 [00:05<00:10,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  36%|███▋      | 20/55 [00:05<00:09,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  38%|███▊      | 21/55 [00:05<00:09,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  40%|████      | 22/55 [00:06<00:08,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  42%|████▏     | 23/55 [00:06<00:08,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  44%|████▎     | 24/55 [00:06<00:08,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  45%|████▌     | 25/55 [00:06<00:07,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  47%|████▋     | 26/55 [00:07<00:07,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  49%|████▉     | 27/55 [00:07<00:07,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  51%|█████     | 28/55 [00:07<00:07,  3.72it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  60%|██████    | 33/55 [00:09<00:05,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  65%|██████▌   | 36/55 [00:09<00:05,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  67%|██████▋   | 37/55 [00:10<00:04,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  71%|███████   | 39/55 [00:10<00:04,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  73%|███████▎  | 40/55 [00:10<00:03,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  75%|███████▍  | 41/55 [00:11<00:03,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.55it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  80%|████████  | 44/55 [00:11<00:02,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  87%|████████▋ | 48/55 [00:13<00:01,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  91%|█████████ | 50/55 [00:13<00:01,  3.65it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 18/20: 100%|██████████| 55/55 [00:14<00:00,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 18, Average Loss: 2.7068



Epoch 19/20:   2%|▏         | 1/55 [00:00<00:14,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:   4%|▎         | 2/55 [00:00<00:13,  3.96it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:   5%|▌         | 3/55 [00:00<00:14,  3.52it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:   7%|▋         | 4/55 [00:01<00:15,  3.20it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:   9%|▉         | 5/55 [00:01<00:15,  3.28it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  11%|█         | 6/55 [00:01<00:14,  3.40it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  13%|█▎        | 7/55 [00:02<00:13,  3.50it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  15%|█▍        | 8/55 [00:02<00:13,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  16%|█▋        | 9/55 [00:02<00:12,  3.64it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  18%|█▊        | 10/55 [00:02<00:12,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  20%|██        | 11/55 [00:03<00:11,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  22%|██▏       | 12/55 [00:03<00:11,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  24%|██▎       | 13/55 [00:03<00:10,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  25%|██▌       | 14/55 [00:03<00:10,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  27%|██▋       | 15/55 [00:04<00:10,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  29%|██▉       | 16/55 [00:04<00:10,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  31%|███       | 17/55 [00:04<00:10,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  33%|███▎      | 18/55 [00:04<00:09,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  35%|███▍      | 19/55 [00:05<00:09,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  36%|███▋      | 20/55 [00:05<00:09,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  38%|███▊      | 21/55 [00:05<00:09,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  40%|████      | 22/55 [00:05<00:08,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  42%|████▏     | 23/55 [00:06<00:08,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  44%|████▎     | 24/55 [00:06<00:08,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  45%|████▌     | 25/55 [00:06<00:07,  3.78it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  47%|████▋     | 26/55 [00:07<00:07,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  49%|████▉     | 27/55 [00:07<00:07,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  51%|█████     | 28/55 [00:07<00:07,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  53%|█████▎    | 29/55 [00:07<00:06,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  60%|██████    | 33/55 [00:08<00:05,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.84it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.97it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  65%|██████▌   | 36/55 [00:09<00:04,  3.91it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  67%|██████▋   | 37/55 [00:09<00:04,  3.95it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.89it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  71%|███████   | 39/55 [00:10<00:04,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  73%|███████▎  | 40/55 [00:10<00:03,  3.91it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  75%|███████▍  | 41/55 [00:10<00:03,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.62it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  80%|████████  | 44/55 [00:11<00:03,  3.48it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.33it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.41it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.46it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  87%|████████▋ | 48/55 [00:13<00:02,  3.47it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.51it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  91%|█████████ | 50/55 [00:13<00:01,  3.58it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.66it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.54it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 19/20: 100%|██████████| 55/55 [00:14<00:00,  3.69it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 19, Average Loss: 2.7042



Epoch 20/20:   2%|▏         | 1/55 [00:00<00:16,  3.37it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:   4%|▎         | 2/55 [00:00<00:15,  3.42it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:   5%|▌         | 3/55 [00:00<00:14,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:   7%|▋         | 4/55 [00:01<00:13,  3.76it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:   9%|▉         | 5/55 [00:01<00:13,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  11%|█         | 6/55 [00:01<00:13,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  13%|█▎        | 7/55 [00:01<00:13,  3.56it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  15%|█▍        | 8/55 [00:02<00:13,  3.50it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  16%|█▋        | 9/55 [00:02<00:13,  3.33it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  18%|█▊        | 10/55 [00:02<00:13,  3.40it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  20%|██        | 11/55 [00:03<00:12,  3.53it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  22%|██▏       | 12/55 [00:03<00:12,  3.51it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  24%|██▎       | 13/55 [00:03<00:11,  3.56it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  25%|██▌       | 14/55 [00:03<00:11,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  27%|██▋       | 15/55 [00:04<00:11,  3.38it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  29%|██▉       | 16/55 [00:04<00:12,  3.14it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  31%|███       | 17/55 [00:04<00:11,  3.29it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  33%|███▎      | 18/55 [00:05<00:10,  3.39it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  35%|███▍      | 19/55 [00:05<00:10,  3.44it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  36%|███▋      | 20/55 [00:05<00:09,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  38%|███▊      | 21/55 [00:06<00:09,  3.57it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  40%|████      | 22/55 [00:06<00:08,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  42%|████▏     | 23/55 [00:06<00:08,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  44%|████▎     | 24/55 [00:06<00:08,  3.74it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  45%|████▌     | 25/55 [00:07<00:07,  3.87it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  47%|████▋     | 26/55 [00:07<00:07,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  49%|████▉     | 27/55 [00:07<00:07,  3.83it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  51%|█████     | 28/55 [00:07<00:07,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  53%|█████▎    | 29/55 [00:08<00:06,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  55%|█████▍    | 30/55 [00:08<00:06,  3.77it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  56%|█████▋    | 31/55 [00:08<00:06,  3.73it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  58%|█████▊    | 32/55 [00:08<00:06,  3.56it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  60%|██████    | 33/55 [00:09<00:06,  3.59it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  62%|██████▏   | 34/55 [00:09<00:05,  3.60it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  64%|██████▎   | 35/55 [00:09<00:05,  3.61it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  65%|██████▌   | 36/55 [00:10<00:05,  3.71it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  67%|██████▋   | 37/55 [00:10<00:04,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  69%|██████▉   | 38/55 [00:10<00:04,  3.68it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  71%|███████   | 39/55 [00:10<00:04,  3.70it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  73%|███████▎  | 40/55 [00:11<00:04,  3.63it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  75%|███████▍  | 41/55 [00:11<00:03,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  76%|███████▋  | 42/55 [00:11<00:03,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  78%|███████▊  | 43/55 [00:11<00:03,  3.80it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  80%|████████  | 44/55 [00:12<00:02,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  82%|████████▏ | 45/55 [00:12<00:02,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  84%|████████▎ | 46/55 [00:12<00:02,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  85%|████████▌ | 47/55 [00:12<00:02,  3.98it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  87%|████████▋ | 48/55 [00:13<00:01,  3.86it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  89%|████████▉ | 49/55 [00:13<00:01,  3.79it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  91%|█████████ | 50/55 [00:13<00:01,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  93%|█████████▎| 51/55 [00:13<00:01,  3.75it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  95%|█████████▍| 52/55 [00:14<00:00,  3.82it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  96%|█████████▋| 53/55 [00:14<00:00,  3.81it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20:  98%|█████████▊| 54/55 [00:14<00:00,  3.90it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])


Epoch 20/20: 100%|██████████| 55/55 [00:14<00:00,  3.67it/s]

Text Embeddings Shape: torch.Size([1, 512])
Image Embeddings Shape: torch.Size([5, 512])
Epoch 20, Average Loss: 2.7013





In [10]:
import torch
import os
from PIL import Image
from tqdm import tqdm
import pandas as pd
from transformers import DistilBertTokenizer
from transformers import CLIPProcessor, CLIPModel

image_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
text_model = DistilBertModel.from_pretrained("distilbert-base-uncased").to(device)

image_model.load_state_dict(torch.load("clip_image_model_epoch_20.bin"))
text_model.load_state_dict(torch.load("distilbert_text_model_epoch_20.bin"))

tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

df2 = pd.read_csv('/kaggle/input/semeval/SemEval2025/test.tsv', sep="\t")

ipath2 = []
sentences2 = []

for i, row in df2.iterrows():
    compound = row['compound']
    expected_order = row['expected_order']
    ch = "".join(char for char in compound if char != " ")
    base_path = f'/kaggle/input/semeval/SemEval2025/train/{ch}'

    if os.path.exists(base_path):
        expected_order = expected_order.strip("[]").replace("'", "").split(",")
        for img_filename in expected_order:
            img_filename = img_filename.strip()
            full_path = os.path.join(base_path, img_filename)
            if os.path.isfile(full_path):
                ipath2.append(full_path)
    sentences2.append(row['sentence'])

def calculate_top1_and_top2_accuracy():
    image_model.eval()
    text_model.eval()

    correct_top1 = 0
    correct_top2 = 0
    total = 0
    text_projection_layer = torch.nn.Linear(768, 512).to(device)

    with torch.no_grad():
        for i, sentence in enumerate(tqdm(sentences2, desc="Evaluating on Test Data")):
            text_inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True).to(device)
            text_outputs = text_model(**text_inputs)
            text_embeddings = text_outputs.last_hidden_state[:, 0, :]
            text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True)
            text_embeddings = text_projection_layer(text_embeddings)

            image_start_idx = 5 * i
            image_end_idx = image_start_idx + 4
            selected_image_paths = ipath2[image_start_idx:image_end_idx + 1]

            image_embeddings_list = []
            for path in selected_image_paths:
                image = Image.open(path).convert("RGB")
                pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
                image_embeddings = image_model.get_image_features(pixel_values=pixel_values)
                image_embeddings = image_embeddings / image_embeddings.norm(dim=-1, keepdim=True)
                image_embeddings_list.append(image_embeddings)

            image_embeddings = torch.cat(image_embeddings_list, dim=0)
            similarities = torch.matmul(text_embeddings, image_embeddings.T)
            similarities = similarities.squeeze(0)

            ranked_indices = torch.argsort(similarities, descending=True)

            if ranked_indices[0] == 0:
                correct_top1 += 1
            if 0 in ranked_indices[:2]:
                correct_top2 += 1

            total += 1

    top1_accuracy = correct_top1 / total * 100
    top2_accuracy = correct_top2 / total * 100
    return top1_accuracy, top2_accuracy

top1_accuracy, top2_accuracy = calculate_top1_and_top2_accuracy()
print(f"Top-1 Accuracy on Test Data: {top1_accuracy:.2f}%")
print(f"Top-2 Accuracy on Test Data: {top2_accuracy:.2f}%")


  image_model.load_state_dict(torch.load("clip_image_model_epoch_20.bin"))
  text_model.load_state_dict(torch.load("distilbert_text_model_epoch_20.bin"))
Evaluating on Test Data: 100%|██████████| 15/15 [00:04<00:00,  3.20it/s]

Top-1 Accuracy on Test Data: 53.33%
Top-2 Accuracy on Test Data: 73.33%



