In [None]:
#!pip install transformers

In [None]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import torch.nn.functional as F
from tqdm import tqdm
import random
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from datetime import datetime
from torchvision import models
#from transformers import CLIPProcessor, CLIPModel
import os
import csv
from peft import LoraConfig, get_peft_model
import json
import clip

In [None]:
root = "/content/drive/MyDrive/MASTER_THESIS/"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
!nvcc --version

In [None]:
once_runned = False

In [None]:
# XLM-R Large Vit-L/14
# OpenAI model: OpenAI ViT-L/14 
# ==> ViT-L/14 or ViT-L/14@336px ?

#model_name = "openai/clip-vit-large-patch14"
#processor = CLIPProcessor.from_pretrained(model_name)
#model = CLIPModel.from_pretrained(model_name).to(device)
model_name = "ViT-L/14"
model, preprocess = clip.load("ViT-L/14", device=device, jit=False) # why double load ?

In [None]:
FULL_DATASET = pd.read_csv(root + "fabritius_data_filtered_downloaded.csv")
# Remove rows with corrupted images
FULL_DATASET = FULL_DATASET[FULL_DATASET["recordID"] != 11546]
FULL_DATASET = FULL_DATASET[FULL_DATASET["recordID"] != 5262]
FULL_DATASET = FULL_DATASET.sample(frac=1.0).reset_index(drop=True)
FULL_DATASET

In [None]:
def fixPath(path):
    return path.replace(".././", "../")

def get_image_path_from_recordID(dataset, recordID):
    """
    Given a recordID, return the local path for its image.
    """
    # Locate row in the downloaded DataFrame
    paths = FULL_DATASET[
        FULL_DATASET["recordID"] == recordID
    ]["low_res_filename"].values

    if len(paths) == 0:
        return None

    path = paths[0]
    # Merge: IMAGES_FOLDER + path[1:]
    merged_path = fixPath(root + "images/" + path[1:])
    return merged_path

In [None]:
TRAINING_CAPTIONS = pd.read_csv(root + "merged_data_training_set.csv")
# rows: recordID,category,focus,caption
TRAINING_CAPTIONS

In [None]:
VALIDATION_CAPTIONS = pd.read_csv(root + "merged_data_validation_set.csv")
# rows: recordID,category,focus,caption
VALIDATION_CAPTIONS

In [None]:
model_metadatas = {
    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "model_name": model_name,
    "pretrained": model_name,
    "dataset": {
        "name": "training_set_captions",
        "size": len(TRAINING_CAPTIONS),
        "size_recordID": len(TRAINING_CAPTIONS["recordID"].unique()),
        "size_per_category": TRAINING_CAPTIONS.groupby("category").size().to_dict(),
        "size_per_focus": TRAINING_CAPTIONS.groupby("focus").size().to_dict(),
    },
    "hyperparameters": {
        "batch_size": 128, #256, # 32 = 8 <==> 128 = 23
        "num_epochs": 30,
        "learning_rate": 5e-5,
        "betas": (0.9, 0.98),
        "weight_decay": 0.2,
    }
}
def getIdentifier():
    return model_metadatas["timestamp"]

# Save model_metadatas
with open(root + f"model_metadatas_{getIdentifier()}.json", "w") as f:
    json.dump(model_metadatas, f)
model_metadatas

In [None]:
# Training sets
class FinetuningDataset(Dataset):
    def __init__(self, dataframe, getImageFromRecordID, preprocess=preprocess):
        self.dataframe = dataframe
        self.getImageFromRecordID = getImageFromRecordID
        self.preprocess = preprocess

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        row = self.dataframe.iloc[idx]

        recordID = row['recordID']
        caption = row['caption']
        path = self.getImageFromRecordID(self.dataframe, recordID)

        path = path.replace("internet", "Internet")
        path = path.replace("Mod", "mod")
        path = path.replace("Old", "old")
        path = path.replace("Stefaan", "stefaan")
        path = path.replace("Art-Foto", "art-foto")
        image = Image.open(path)#.convert("RGB")
        image = self.preprocess(image)

        return image, caption, recordID

def customBatchBuilder(samples):
    images, captions, recordIDs = zip(*samples)
    #inputs = processor(text=captions, images=images, return_tensors="pt", padding=True, truncation=True)
    inputs = None
    #return inputs, images, captions, recordIDs
    return images, captions, recordIDs

# Training dataset with only content focus
DATASET__TRAINING_ONLY_CONTENT_FOCUS = FinetuningDataset(TRAINING_CAPTIONS[TRAINING_CAPTIONS["focus"] == "content"], get_image_path_from_recordID)
DATASET__TRAINING_ALL_FOCUS          = FinetuningDataset(TRAINING_CAPTIONS, get_image_path_from_recordID)

# Make dataloaders
DATALOADER__TRAINING_ONLY_CONTENT_FOCUS = DataLoader(
    DATASET__TRAINING_ONLY_CONTENT_FOCUS, 
    batch_size=model_metadatas["hyperparameters"]["batch_size"], 
    shuffle=True, 
    num_workers=0, 
    collate_fn=customBatchBuilder
)
DATALOADER__TRAINING_ALL_FOCUS = DataLoader(
    DATASET__TRAINING_ALL_FOCUS, 
    batch_size=model_metadatas["hyperparameters"]["batch_size"], 
    shuffle=True, 
    num_workers=0
)

# Print the lengths of the datasets and dataloaders
print("TRAINING_ONLY_CONTENT_FOCUS:", len(DATASET__TRAINING_ONLY_CONTENT_FOCUS), " | ", len(DATALOADER__TRAINING_ONLY_CONTENT_FOCUS))
print("TRAINING_ALL_FOCUS:", len(DATASET__TRAINING_ALL_FOCUS), " | ", len(DATALOADER__TRAINING_ALL_FOCUS))

In [None]:
# Training_metrics & Validation sets
DATASET__VALIDATION_ALL_FOCUS = FinetuningDataset(VALIDATION_CAPTIONS, get_image_path_from_recordID)
DATALOADER__VALIDATION_ALL_FOCUS = DataLoader(DATASET__VALIDATION_ALL_FOCUS, batch_size=model_metadatas["hyperparameters"]["batch_size"], shuffle=True, num_workers=0)

# Since we have multiple captions per image, we want to make a Dataset that allow us to measure the performance of the model on each focus
DATASET__TRAINING_PER_FOCUS     = {}
DATASET__VALIDATION_PER_FOCUS   = {}

for focus in ["content", "emotion", "colors", "luminosity"]:
    DATASET__TRAINING_PER_FOCUS[focus]      = FinetuningDataset(TRAINING_CAPTIONS[TRAINING_CAPTIONS["focus"] == focus], get_image_path_from_recordID)
    DATASET__VALIDATION_PER_FOCUS[focus]    = FinetuningDataset(VALIDATION_CAPTIONS[VALIDATION_CAPTIONS["focus"] == focus], get_image_path_from_recordID)

# Make dataloaders
DATALOADER__TRAINING_PER_FOCUS = {}
DATALOADER__VALIDATION_PER_FOCUS = {}
for focus in ["content", "emotion", "colors", "luminosity"]:
    DATALOADER__TRAINING_PER_FOCUS[focus] = DataLoader(DATASET__TRAINING_PER_FOCUS[focus], batch_size=model_metadatas["hyperparameters"]["batch_size"], shuffle=True, num_workers=0 , collate_fn=customBatchBuilder)
    DATALOADER__VALIDATION_PER_FOCUS[focus] = DataLoader(DATASET__VALIDATION_PER_FOCUS[focus], batch_size=model_metadatas["hyperparameters"]["batch_size"], shuffle=True, num_workers=0, collate_fn=customBatchBuilder)

# Print the lengths of the datasets and dataloaders
sizes = pd.DataFrame(columns=["focus", "Dataset (training)", "Dataset (validation)", "Dataloader (training)", "Dataloader (validation)"])
for focus in ["content", "emotion", "colors", "luminosity"]:
    sizes.loc[len(sizes)] = [focus, len(DATASET__TRAINING_PER_FOCUS[focus]), len(DATASET__VALIDATION_PER_FOCUS[focus]), len(DATALOADER__TRAINING_PER_FOCUS[focus]), len(DATALOADER__VALIDATION_PER_FOCUS[focus])]

sizes

In [None]:
def convert_models_to_fp32(model): 
    for p in model.parameters(): 
        p.data = p.data.float() 
        p.grad.data = p.grad.data.float() 

In [None]:
if device == "cpu":
  model.float()

In [None]:
optimizer = optim.AdamW(
    model.parameters(), 
    lr=model_metadatas["hyperparameters"]["learning_rate"], 
    weight_decay=model_metadatas["hyperparameters"]["weight_decay"],
    betas=model_metadatas["hyperparameters"]["betas"]
)
loss_image      = nn.CrossEntropyLoss()
loss_caption    = nn.CrossEntropyLoss()

In [None]:
def get_average_position(cosine_similarities):
    """
    What is the Average Position?
    ==> The Average Position is a metric that evaluates the effectiveness of a recommendation algorithm.
    ==> It is the average of the positions of the correct answers.
    ==> The position of a query response is the rank of the first correct answer.
    ==> The Average Position is a number between 0 and n, where 0 means that the first correct answer is always ranked first.
    """
    average_position = 0
    for i in range(len(cosine_similarities)):
        sorted_indices = np.argsort(cosine_similarities[i])[::-1]
        rank = np.where(sorted_indices == i)[0][0] + 1
        average_position += rank
    average_position /= len(cosine_similarities)
    return average_position

def get_MRR(cosine_similarities):
    """
    What is MRR (Mean Reciprocal Rank)?
    ==> The Mean Reciprocal Rank is a metric that evaluates the effectiveness of a recommendation algorithm.
    ==> It is the average of the reciprocal ranks of the top k items.
    ==> The reciprocal rank of a query response is the multiplicative inverse of the rank of the first correct answer.
    ==> The MRR is a number between 0 and 1, where 1 means that the first correct answer is always ranked first.
    """
    mrr = 0
    for i in range(len(cosine_similarities)):
        sorted_indices = np.argsort(cosine_similarities[i])[::-1]
        rank = np.where(sorted_indices == i)[0][0] + 1
        mrr += 1 / rank
    mrr /= len(cosine_similarities)
    return mrr

def get_recall_at_k(cosine_similarities, k):
    """
    What is Recall@k?
    ==> The Recall@k is a metric that evaluates the effectiveness of a recommendation algorithm.
    ==> It is the proportion of the top k items that are relevant.
    ==> The Recall@k is a number between 0 and 1, where 1 means that all top k items are relevant.
    """
    recall_at_k = 0
    for i in range(len(cosine_similarities)):
        sorted_indices = np.argsort(cosine_similarities[i])[::-1]
        if i in sorted_indices[:k]:
            recall_at_k += 1
    recall_at_k /= len(cosine_similarities)
    return recall_at_k

def get_nDCG_at_k(cosine_similarities, k):
    """
    What is nDCG@k (Discounted cumulative gain)
    ==> The nDCG@k is a metric that evaluates the effectiveness of a recommendation algorithm.
    ==> It is the normalized discounted cumulative gain at the top k items.
    ==> The nDCG@k is a number between 0 and 1, where 1 means that all top k items are relevant and perfectly ranked.
    """
    nDCG_at_k = 0
    for i in range(len(cosine_similarities)):
        sorted_indices = np.argsort(cosine_similarities[i])[::-1]
        rank = np.where(sorted_indices == i)[0][0] + 1
        nDCG_at_k += 1 / np.log2(rank + 1) if rank <= k else 0
    nDCG_at_k /= len(cosine_similarities)
    return nDCG_at_k

In [None]:
def benchmark_on_dataloader(model, dataset, dataloader, device):
    """
    This function measures various metrics on a dataloader:
    - Loss
    - Average Position
    - MRR (Mean Reciprocal Rank)
    - Recall@1, Recall@5, Recall@10
    - nDCG@1, nDCG@5, nDCG@10
    """
    model.eval()
    loss_tot = 0
    positions = []
    recalls = {1: 0, 5: 0, 10: 0}
    ndcgs = {1: 0, 5: 0, 10: 0}

    images_embeddings = torch.tensor([]).to(device)
    texts_embeddings = torch.tensor([]).to(device)

    with torch.no_grad():
        for images, captions, recordIDs in tqdm(dataloader):            
            images = images.to(device)
            captions = captions.to(device)

            # Compute the embeddings
            image_embeddings = model.encode_image(images)
            text_embeddings = model.encode_text(captions)

            # Normalize
            image_embeddings /= image_embeddings.norm(dim=-1, keepdim=True)
            text_embeddings /= text_embeddings.norm(dim=-1, keepdim=True)

            # Recover logits
            logits_per_image, logits_per_text = model(images, captions)

            # Ground truth
            ground_truth = torch.arange(len(images), dtype=torch.long, device=device)

            # Compute the loss
            loss = (loss_image(logits_per_image, ground_truth) + loss_caption(logits_per_text, ground_truth))/2
            loss_tot += loss.item()

            # Add the embeddings to the list
            images_embeddings = torch.cat((images_embeddings, image_embeddings), 0)
            texts_embeddings = torch.cat((texts_embeddings, text_embeddings), 0)

    # Compute the loss
    loss_per_pair = loss_tot/len(dataset) # Average loss per pair

    # Compute the cosine similarity
    similarities = images_embeddings @ texts_embeddings.T
    similarities = similarities.cpu().numpy()

    # Compute the positions
    average_position = get_average_position(similarities)
    mrr = get_MRR(similarities)
    recalls[1] = get_recall_at_k(similarities, 1)
    recalls[5] = get_recall_at_k(similarities, 5)
    recalls[10] = get_recall_at_k(similarities, 10)
    ndcgs[1] = get_nDCG_at_k(similarities, 1)
    ndcgs[5] = get_nDCG_at_k(similarities, 5)
    ndcgs[10] = get_nDCG_at_k(similarities, 10)

    return [loss_per_pair, average_position, mrr, recalls[1], recalls[5], recalls[10], ndcgs[1], ndcgs[5], ndcgs[10]]

In [None]:
def run_benchmark(model, epoch, training_df, validation_df, device):
    """
    There are quite a lot of benchmarks to run:
    1) Training set
        1.1) Training set (all focus)
        1.2) Training set (content focus)
        1.3) Training set (emotion focus)
        1.4) Training set (colors focus)
        1.5) Training set (luminosity focus)
    2) Validation set
        2.1) Validation set (all focus)
        2.2) Validation set (content focus)
        2.3) Validation set (emotion focus)
        2.4) Validation set (colors focus)
        2.5) Validation set (luminosity focus)
    ==> The "all" row is the mean of the other rows
    """
    def addRow(df, epoch, focus, results):
        df.loc[len(df)] = [epoch, focus] + results

    # Training set
    mean_row = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    for focus in ["content", "emotion", "colors", "luminosity"]:
        measures = benchmark_on_dataloader(model, TRAINING_CAPTIONS[TRAINING_CAPTIONS["focus"] == focus], DATALOADER__TRAINING_PER_FOCUS[focus], device)
        addRow(training_df, epoch, focus, measures)
        mean_row = [mean_row[i] + measures[i] for i in range(len(mean_row))]
    mean_row = [mean_row[i] / 4 for i in range(len(mean_row))]
    addRow(training_df, epoch, "all", mean_row)

    # Validation set
    mean_row = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    for focus in ["content", "emotion", "colors", "luminosity"]:
        measures = benchmark_on_dataloader(model, VALIDATION_CAPTIONS[VALIDATION_CAPTIONS["focus"] == focus], DATALOADER__VALIDATION_PER_FOCUS[focus], device)
        addRow(validation_df, epoch, focus, measures)
        mean_row = [mean_row[i] + measures[i] for i in range(len(mean_row))]
    mean_row = [mean_row[i] / 4 for i in range(len(mean_row))]
    addRow(validation_df, epoch, "all", mean_row)

In [None]:
def create_pd_tables():
    """
    There are quite a lot of benchmarks to run:
    1) Training set
        1.1) Training set (all focus)
        1.2) Training set (content focus)
        1.3) Training set (emotion focus)
        1.4) Training set (colors focus)
        1.5) Training set (luminosity focus)
    2) Validation set
        2.1) Validation set (all focus)
        2.2) Validation set (content focus)
        2.3) Validation set (emotion focus)
        2.4) Validation set (colors focus)
        2.5) Validation set (luminosity focus)
    ==> Create two DataFrames, one for training and one for validation
    """
    training_df = pd.DataFrame(columns=["epoch", "focus", "loss", "average_position", "mrr", "recall@1", "recall@5", "recall@10", "ndcg@1", "ndcg@5", "ndcg@10"])
    validation_df = pd.DataFrame(columns=["epoch", "focus", "loss", "average_position", "mrr", "recall@1", "recall@5", "recall@10", "ndcg@1", "ndcg@5", "ndcg@10"])
    return training_df, validation_df

In [None]:
# Plot the mrr for the training and validation sets for each focus
def plot_mrr(df, title, save_name):
    epochs = sorted(df["epoch"].unique())
    mrrs = {focus: [] for focus in ["all", "content", "emotion", "colors", "luminosity"]}

    for focus in ["all", "content", "emotion", "colors", "luminosity"]:
        for epoch in epochs:
            mrr = df[(df["epoch"] == epoch) & (df["focus"] == focus)]["mrr"].values[0]
            mrrs[focus].append(mrr)

    if len(epochs)==1:
      # Bar plot
      plt.figure(figsize=(10, 5))

      # Sorted by MRR (high to low)
      mrrs = {k: v for k, v in sorted(mrrs.items(), key=lambda item: item[1][-1], reverse=True)}

      for focus, mrr in mrrs.items():
        plt.bar(focus, mrr[-1], label=focus)

      plt.title(title)
      plt.xlabel("Focus")
      plt.ylabel("MRR")
      plt.ylim(0, 1)
      plt.grid()
      plt.savefig(root + save_name + ".pdf")
      plt.legend()
    else:
      # Plot the MRR for each focus (different colors) per epoch
      plt.figure(figsize=(10, 5))

      for focus in ["all", "content", "emotion", "colors", "luminosity"]:
          plt.plot(epochs, mrrs[focus], label=focus)

      plt.title(title)
      plt.xlabel("Epoch")
      plt.ylabel("MRR")
      plt.legend()
      plt.ylim(0, 1)
      plt.grid()
      plt.savefig(root + save_name + "_" + getIdentifier() + ".pdf")
      plt.show()

In [None]:
def plot_losses(training_df, validation_df, title, save_name):
    epochs = sorted(training_df["epoch"].unique())
    losses = {focus: [] for focus in ["all", "content", "emotion", "colors", "luminosity"]}

    for focus in ["all", "content", "emotion", "colors", "luminosity"]:
        for epoch in epochs:
            loss = training_df[(training_df["epoch"] == epoch) & (training_df["focus"] == focus)]["loss"].values[0]
            losses[focus].append(loss)

    # Plot the loss for each focus (different colors) per epoch
    plt.figure(figsize=(10, 5))

    for focus in ["all", "content", "emotion", "colors", "luminosity"]:
        plt.plot(epochs, losses[focus], label=focus)

    plt.title(title)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid()
    plt.savefig(root + save_name + "_" + getIdentifier() + ".pdf")
    plt.show()

In [None]:
once_runned = False

In [None]:
# Compute the metrics on the untrained model
if not once_runned:
    # Compute the metrics on the untrained model (SHOULD NOT BE RUN AGAIN)
    untrained_training_df, untrained_validation_df = create_pd_tables()
    run_benchmark(model, 0, untrained_training_df, untrained_validation_df, device)
    plot_mrr(untrained_training_df, "MRR on the training set (untrained model)", "untrained_training_mrr")
    plot_mrr(untrained_validation_df, "MRR on the validation set (untrained model)", "untrained_validation_mrr")
    once_runned = True

In [None]:
untrained_training_df

In [None]:
untrained_validation_df

In [None]:
def printMetrics(epoch, training_df, validation_df):
    combined_df = pd.DataFrame(columns=["Metric name", "Training", "Validation"])
    for metric in ["loss", "average_position", "mrr", "recall@1", "ndcg@1"]:
      for focus in ["all", "content", "emotion", "colors", "luminosity"]:
            training_metric = training_df[(training_df["epoch"] == epoch) & (training_df["focus"] == focus)][metric].values[0]
            validation_metric = validation_df[(validation_df["epoch"] == epoch) & (validation_df["focus"] == focus)][metric].values[0]
            combined_df.loc[len(combined_df)] = [metric + " (" + focus + ")", training_metric, validation_metric]

    for metric in ["loss", "average_position", "mrr", "recall@1", "ndcg@1"]:
      sub_df = combined_df[combined_df["Metric name"].str.contains(metric)]
      # Sort
      sub_df = sub_df.sort_values(by="Training", ascending=True)
      print(f"Metric: {metric}")
      print(sub_df)
      print()

# Test
printMetrics(0, untrained_training_df, untrained_validation_df)

In [None]:
def train_for_one_epoch(model, epoch, training_df, validation_df, dataloader, device):
    model.train()

    for images, captions, recordIDs in tqdm(dataloader):            
        images = images.to(device)
        captions = captions.to(device)
        
        # Recover logits
        logits_per_image, logits_per_text = model(images, captions)

        # Ground truth
        ground_truth = torch.arange(len(images), dtype=torch.long, device=device)

        # Compute the loss
        loss = (loss_image(logits_per_image, ground_truth) + loss_caption(logits_per_text, ground_truth))/2

        # Backward pass
        loss.backward()
        if device == "cpu":
            optimizer.step()
        else : 
            convert_models_to_fp32(model)
            optimizer.step()
            clip.model.convert_weights(model)

        print(f"Epoch {epoch} | Loss: {loss.item()}")

    # Compute the metrics for this epoch
    print("Running benchmark...")
    run_benchmark(model, epoch, training_df, validation_df, device)

    # Print the metrics
    printMetrics(epoch, training_df, validation_df)

In [None]:
def save_model_weights(model, type, epoch):
    fullIdentifier = model_metadatas["timestamp"] + "_" + type + "_" + str(epoch)
    model.save_pretrained(root + "models/" + fullIdentifier + ".pt")

# Training loop (only content focus)

In [None]:
# Load the initial model weights
#model, preprocess = clip.load("ViT-L/14", device=device, jit=False)
print("Model reloaded")

In [None]:
training_df_onlyFocus, validation_df_onlyFocus = create_pd_tables()
# Copy the rows from the untrained model
for i, row in untrained_training_df.iterrows():
    training_df_onlyFocus.loc[len(training_df_onlyFocus)] = row
for i, row in untrained_validation_df.iterrows():
    validation_df_onlyFocus.loc[len(validation_df_onlyFocus)] = row

print(len(untrained_training_df), len(training_df_onlyFocus))
print(len(untrained_validation_df), len(validation_df_onlyFocus))

In [None]:
# Training loop (only content focus)
for epoch in range(1, model_metadatas["hyperparameters"]["num_epochs"]+1):
    print("Epoch", epoch)
    train_for_one_epoch(model, epoch, training_df_onlyFocus, validation_df_onlyFocus, DATALOADER__TRAINING_ONLY_CONTENT_FOCUS, device)
save_model_weights(model, "onlyFocus", epoch)

In [None]:
# Plot the losses for the training and validation sets for each focus
plot_losses(training_df_onlyFocus, validation_df_onlyFocus, "Losses on the training and validation sets (only content focus)", "losses_only_content_focus")

In [None]:
# Plot the MRR for the training and validation sets for each focus
plot_mrr(training_df_onlyFocus, "MRR on the training set (only content focus)", "training_mrr_onlyFocus")
plot_mrr(validation_df_onlyFocus, "MRR on the validation set (only content focus)", "validation_mrr_onlyFocus")

# Training loop (all focus)

In [None]:
# Load the initial model weights
#model, preprocess = clip.load("ViT-L/14", device=device, jit=False)
print("Model reloaded")

In [None]:
training_df_allFocus, validation_df_allFocus = create_pd_tables()
# Copy the rows from the untrained model
for i, row in untrained_training_df.iterrows():
    training_df_allFocus.loc[len(training_df_allFocus)] = row
for i, row in untrained_validation_df.iterrows():
    validation_df_allFocus.loc[len(validation_df_allFocus)] = row

print(len(untrained_training_df), len(training_df_allFocus))
print(len(untrained_validation_df), len(validation_df_allFocus))

In [None]:
# Training loop (all focus)
for epoch in range(1, model_metadatas["hyperparameters"]["num_epochs"]+1):
    print("Epoch", epoch)
    train_for_one_epoch(model, epoch, training_df_allFocus, validation_df_allFocus, DATALOADER__TRAINING_ALL_FOCUS, device)
save_model_weights(model, "allFocus", epoch)

In [None]:
# Plot the losses for the training and validation sets for each focus
plot_losses(training_df_allFocus, validation_df_allFocus, "Losses on the training and validation sets (all focus)", "losses_all_focus")

In [None]:
# Plot the MRR for the training and validation sets for each focus
plot_mrr(training_df_allFocus, "MRR on the training set (all focus)", "training_mrr_allFocus")
plot_mrr(validation_df_allFocus, "MRR on the validation set (all focus)", "validation_mrr_allFocus")