# MULTIMODAL MODEL

##### Ce notebook permet d'entraîner le modèle Clip d'Open AI, fait pour associer texte et image et effectuer des tâches comme la classification.

### Tools

##### Import des librairies

In [1]:
import os
import sys
import random
from datetime import datetime
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchinfo import summary
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

##### Debugging

In [2]:
def log(text):
    with open("logs/debug_log.txt", "a") as f:
        sys.stdout = f
        current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        print(f"[{current_time}] : {text}")
        sys.stdout = sys.__stdout__

### Data

##### Définition des variables globales

In [3]:
DATA_DIR = "../data/processed/img_classified_by_prdtypecode/img_classified_by_prdtypecode"
CSV_FILE = "../data/csv_files/img-text-clean-data.csv"
SAVE_DIR = "save"
ACC_LOSS_HIST_DIR = "acc_loss_history"
NUM_CLASSES = 27
BATCH_SIZE = 32
EPOCHS = 30
LEARNING_RATE = 0.001
EARLY_STOPPING_PATIENCE = 5
LR_REDUCER_PATIENCE = 3

##### Chargement des données

In [4]:
df = pd.read_csv(CSV_FILE)

# récupération du noim des classes
class_names = sorted(os.listdir(DATA_DIR))
print("class_names :\n", class_names)

# mapping en index
label_to_index = {label: idx for idx, label in enumerate(class_names)}
print("label_to_index :\n", label_to_index)

def map_label(code):
    return label_to_index.get(str(code), None)

df['label'] = df['prdtypecode'].apply(map_label).astype('Int64')
display(df.head())
print(df.info())

# identification des valeurs manquantes texte
missing_values = df.isnull().sum()
print("Valeurs manquantes par colonne :\n", missing_values)

missing_desc = df[df['desi_desc_cleaned'].isnull()]
print("Lignes avec des valeurs nulles dans 'desi_desc_cleaned' :\n")
display(missing_desc.head())

# suppression des lignes avec valeurs manquantes texte
df_cleaned = df.dropna(subset = ['desi_desc_cleaned'])

# constitution d'un dataframe avec 700 produits par catégorie
num_products_per_category = 750
df_sampled = df_cleaned.groupby('prdtypecode').apply(lambda x: x.sample(n=min(len(x), num_products_per_category), random_state=1))
df_sampled = df_sampled.reset_index(drop=True)
print(f"Nouveau DataFrame avec {num_products_per_category} lignes par catégorie :\n")
display(df_sampled.head())

# Vérification du nombre de lignes par catégorie
print("Nombre de lignes par 'prdtypecode' :\n", df_sampled['prdtypecode'].value_counts())
print("Nombre de lignes par 'label' :\n", df_sampled['label'].value_counts())

class_names :
 ['10', '1140', '1160', '1180', '1280', '1281', '1300', '1301', '1302', '1320', '1560', '1920', '1940', '2060', '2220', '2280', '2403', '2462', '2522', '2582', '2583', '2585', '2705', '2905', '40', '50', '60']
label_to_index :
 {'10': 0, '1140': 1, '1160': 2, '1180': 3, '1280': 4, '1281': 5, '1300': 6, '1301': 7, '1302': 8, '1320': 9, '1560': 10, '1920': 11, '1940': 12, '2060': 13, '2220': 14, '2280': 15, '2403': 16, '2462': 17, '2522': 18, '2582': 19, '2583': 20, '2585': 21, '2705': 22, '2905': 23, '40': 24, '50': 25, '60': 26}


Unnamed: 0,designation,description,productid,imageid,image name,image size in bits,prdtypecode,désignation textuelle,catégorie niv 1,desi_desc_cleaned,label
0,Olivia: Personalisiertes Notizbuch / 150 Seite...,,3804725264,1263597046,image_1263597046_product_3804725264.jpg,14010,10,Livres anciens / occasion,Littérature,olivia personalisiertes notizbuch seiten punkt...,0
1,Journal Des Arts (Le) N° 133 Du 28/09/2001 - L...,,436067568,1008141237,image_1008141237_product_436067568.jpg,14854,2280,"journaux, revues, magazines anciens",Littérature,journal arts art marche salon art asiatique pa...,15
2,Grand Stylet Ergonomique Bleu Gamepad Nintendo...,PILOT STYLE Touch Pen de marque Speedlink est ...,201115110,938777978,image_938777978_product_201115110.jpg,6898,50,Accessoires & produits dérivés gaming,Jeux Vidéos,stylet ergonomique bleu gamepad nintendo wii s...,25
3,Peluche Donald - Europe - Disneyland 2000 (Mar...,,50418756,457047496,image_457047496_product_50418756.jpg,14404,1280,Jeux & jouets pour enfants,Jeux de société & Jouets,peluche donald europe disneyland marionnette d...,4
4,La Guerre Des Tuques,Luc a des id&eacute;es de grandeur. Il veut or...,278535884,1077757786,image_1077757786_product_278535884.jpg,20435,2705,Livres neufs,Littérature,guerre tuques luc ideacute grandeur veut organ...,22


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84916 entries, 0 to 84915
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   designation            84916 non-null  object
 1   description            55116 non-null  object
 2   productid              84916 non-null  int64 
 3   imageid                84916 non-null  int64 
 4   image name             84916 non-null  object
 5   image size in bits     84916 non-null  int64 
 6   prdtypecode            84916 non-null  int64 
 7   désignation textuelle  84916 non-null  object
 8   catégorie niv 1        84916 non-null  object
 9   desi_desc_cleaned      84521 non-null  object
 10  label                  84916 non-null  Int64 
dtypes: Int64(1), int64(4), object(6)
memory usage: 7.2+ MB
None
Valeurs manquantes par colonne :
 designation                  0
description              29800
productid                    0
imageid                      0
image

Unnamed: 0,designation,description,productid,imageid,image name,image size in bits,prdtypecode,désignation textuelle,catégorie niv 1,desi_desc_cleaned,label
101,Innercity Burnout [Import Allemand] [Jeu Pc],<br>Attention !!! Ce produit est un import si...,190095382,933239989,image_933239989_product_190095382.jpg,43805,40,"Jeux vidéos anciens, équipement",Jeux Vidéos,,24
107,Zumba Fitness : Rush (Jeu Kinect) [Import Alle...,<br>Attention !!! Ce produit est un import si...,190091299,933233675,image_933233675_product_190091299.jpg,46184,40,"Jeux vidéos anciens, équipement",Jeux Vidéos,,24
339,C/SEP ARRIERE GHE SUPERCHIEF B737117 AD.IHF,,1657064547,1096608932,image_1096608932_product_1657064547.jpg,17070,2585,"Outillage intérieur / extérieur, tâches ménagères",Maison & ameublement,,21
346,Assassin's Creed : Brotherhood + Assassin's Cr...,<br>Attention !!! Ce produit est un import si...,220267350,955658863,image_955658863_product_220267350.jpg,56560,40,"Jeux vidéos anciens, équipement",Jeux Vidéos,,24
421,Planet 51 [Import Allemand] [Jeu Wii],<br>Attention !!! Ce produit est un import si...,190093955,933235320,image_933235320_product_190093955.jpg,44208,40,"Jeux vidéos anciens, équipement",Jeux Vidéos,,24


Nouveau DataFrame avec 750 lignes par catégorie :



  df_sampled = df_cleaned.groupby('prdtypecode').apply(lambda x: x.sample(n=min(len(x), num_products_per_category), random_state=1))


Unnamed: 0,designation,description,productid,imageid,image name,image size in bits,prdtypecode,désignation textuelle,catégorie niv 1,desi_desc_cleaned,label
0,Resolving Stress In Your Marriage : How To Ide...,,53291144,539067330,image_539067330_product_53291144.jpg,27972,10,Livres anciens / occasion,Littérature,resolving stress your marriage how identify an...,0
1,From Times Square To Timbuktu: The Post-Christ...,,196547987,946518577,image_946518577_product_196547987.jpg,10813,10,Livres anciens / occasion,Littérature,from times square timbuktu the post christian ...,0
2,Transnationalism In Iranian Political Thought,During the Iranian Revolution of 1978/9 the in...,1798131351,1151810891,image_1151810891_product_1798131351.jpg,31915,10,Livres anciens / occasion,Littérature,transnationalism iranian political thought dur...,0
3,Jeanne D'arc Opera En Quatre Actes Et Six Tabl...,Vendu en l'état - Exemplaire de travail.,390987066,1003160003,image_1003160003_product_390987066.jpg,34235,10,Livres anciens / occasion,Littérature,jeanne arc opera actes tableaux paroles musiqu...,0
4,Les Maladies De La Femmes,,6408880,476582358,image_476582358_product_6408880.jpg,28749,10,Livres anciens / occasion,Littérature,maladies femmes,0


Nombre de lignes par 'prdtypecode' :
 prdtypecode
10      750
40      750
50      750
60      750
1140    750
1160    750
1180    750
1280    750
1281    750
1300    750
1301    750
1302    750
1320    750
1560    750
1920    750
1940    750
2060    750
2220    750
2280    750
2403    750
2462    750
2522    750
2582    750
2583    750
2585    750
2705    750
2905    750
Name: count, dtype: int64
Nombre de lignes par 'label' :
 label
0     750
24    750
25    750
26    750
1     750
2     750
3     750
4     750
5     750
6     750
7     750
8     750
9     750
10    750
11    750
12    750
13    750
14    750
15    750
16    750
17    750
18    750
19    750
20    750
21    750
22    750
23    750
Name: count, dtype: Int64


### Dataset

##### Création d'un dataset

In [5]:
class ProductDataset(Dataset):
    def __init__(self, dataframe, data_dir, base_transform=None, augment_transform=None):
        self.dataframe = dataframe
        self.data_dir = data_dir
        self.base_transform = base_transform
        self.augment_transform = augment_transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = os.path.join(self.data_dir, str(self.dataframe.iloc[idx]['prdtypecode']), str(self.dataframe.iloc[idx]['image name']))
        image = Image.open(img_path).convert("RGB")
        text = self.dataframe.iloc[idx]['desi_desc_cleaned']
        label = self.dataframe.iloc[idx]['label']

        # Redimensionnement img
        if self.base_transform:
            image = self.base_transform(image)

        # Augmentation img
        if random.random() <= 0.3:  # 30% de probabilité
            if self.augment_transform:
                image = self.augment_transform(image)
                
        return image, text, label

# Transformations de redimensionnement
base_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Transformations d'augmentation d'images
augment_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
])

##### Training et validation

In [6]:
train_df, val_df = train_test_split(df_sampled, test_size = 0.2, stratify = df_sampled['label'], random_state = 42)

train_dataset = ProductDataset(train_df, DATA_DIR, base_transform = base_transform, augment_transform = augment_transform)
val_dataset = ProductDataset(val_df, DATA_DIR, base_transform = base_transform, augment_transform = augment_transform)

train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True, drop_last = True)
val_loader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = False, drop_last = True)

### Training

##### Classe pour la couche de classification initiale

In [7]:
# Ajout d'une couche de classification
class Classifier(nn.Module):
    def __init__(self, clip_model, num_classes):
        super(Classifier, self).__init__()
        self.clip_model = clip_model
        self.classifier = nn.Linear(32, num_classes)

    def forward(self, images, texts):
        outputs = self.clip_model(pixel_values=images, input_ids=texts)
        
        #log(f"Shape of logits_per_image: {outputs.logits_per_image.shape}")
        if outputs.logits_per_image.shape[1] != self.classifier.in_features:
            print("La dimension de logits_per_image ne correspond pas à la dimension d'entrée de la couche classifier.")
            print(f"logits_per_image : {outputs.logits_per_image.shape[1]}")
            print(f"classifier.in_features : {classifier.in_features}")
            
        logits = self.classifier(outputs.logits_per_image)
        return logits

##### Instanciation du classifier

In [8]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

# Gel des couches pré-entraînées
for param in model.parameters():
    param.requires_grad = False

# Dégel de couches de sortie
num_unfrozen_layers = 2
unfrozen_count = 0
total_layers = len(list(model.parameters()))
for idx, param in enumerate(model.parameters()):
    if idx > (total_layers - num_unfrozen_layers):
        param.requires_grad = True
        unfrozen_count += 1
    else:
        param.requires_grad = False
print(f"Dégel de {unfrozen_count} couches sur {total_layers} couches totales.")

# Définition de la couche classifier
classifier = Classifier(model, NUM_CLASSES)

# Vérification
unfrozen_count = 0
total_params = list(classifier.parameters())
for idx, param in enumerate(total_params):
    if param.requires_grad:
        unfrozen_count += 1
        print(f"La couche {idx + 1} est dégelée.")
print(f"{unfrozen_count} couche(s) dégelée(s) sur un total de {len(total_params)} couche(s).")



Dégel de 1 couches sur 398 couches totales.
La couche 398 est dégelée.
La couche 399 est dégelée.
La couche 400 est dégelée.
3 couche(s) dégelée(s) sur un total de 400 couche(s).


##### Paramétres supplémentaires (critère d'évaluation, optimiseur, etc.)

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(classifier.parameters(), lr=LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, mode = 'min', factor = 0.1, patience = LR_REDUCER_PATIENCE, verbose = True)
best_val_loss = float('inf')
patience_counter = 0



##### Sauvegarde du modèle

In [18]:
def save_model(classifier, optimizer, scheduler, epoch, val_loss, save_dir):

    # Création du répertoire si inexistant
    os.makedirs(save_dir, exist_ok=True)

    # formatage du nom de fichier
    current_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    save_path = os.path.join(save_dir, f'{current_time}_clip_epoch_{epoch}.pth')

    # Sauvegarde du modèle
    torch.save({
        'epoch': epoch,
        'model_state_dict': classifier.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'loss': val_loss,
        'learning_rate': optimizer.param_groups[0]['lr'],
    }, save_path)

    print(f"Modèle sauvegardé à : {save_path}")

##### Sauvegarde des historiques loss et validation

In [11]:
def save_history(train_loss_history, train_acc_history, val_loss_history, val_acc_history, save_dir, epoch):
    
    # Création du dossier de sauvegarde s'il n'existe pas
    os.makedirs(save_dir, exist_ok = True)

    # Création du path du fichier
    current_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    filepath = os.path.join(save_dir, f'{current_time}_clip_acc_loss_history_epoch{epoch}.pth')
    
    # Sauvegarde des listes dans un fichier .pkl
    with open(filepath, 'wb') as f:
        pickle.dump({
            'train_loss_history': train_loss_history,
            'train_acc_history': train_acc_history,
            'val_loss_history': val_loss_history,
            'val_acc_history': val_acc_history
        }, f)
    
    print(f"Historique d'entraînement sauvegardé à : {filepath}")

##### Training loop

In [19]:
def train_clip_model(classifier,
                     train_loader,
                     val_loader,
                     processor,
                     criterion,
                     optimizer,
                     scheduler,
                     epochs,
                     save_dir,
                     early_stopping_patience,
                     train_loss_history = None,
                     train_acc_history = None,
                     val_loss_history = None,
                     val_acc_history = None,
                     start_epoch = 0,
                     best_val_loss = float('inf')):
    
    # Initialisation des historiques si vides
    if train_loss_history is None:
        train_loss_history = []
    if train_acc_history is None:
        train_acc_history = []
    if val_loss_history is None:
        val_loss_history = []
    if val_acc_history is None:
        val_acc_history = []

    patience_counter = 0

    for epoch in range(start_epoch, start_epoch + epochs): 
        classifier.train()
        train_loss = 0
        correct = 0
        total = 0

        # Entraînement
        for images, texts, labels in tqdm(train_loader):
            inputs = processor(text=texts, images=images, return_tensors="pt", padding=True, truncation=True, do_rescale=False)
            pixel_values = inputs['pixel_values']
            input_ids = inputs['input_ids']

            optimizer.zero_grad()
            outputs = classifier(pixel_values, input_ids)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss /= len(train_loader)
        train_acc = correct / total
        train_loss_history.append(train_loss)
        train_acc_history.append(train_acc)

        # Validation
        classifier.eval()
        val_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, texts, labels in tqdm(val_loader):
                inputs = processor(text = texts, images = images, return_tensors = "pt", padding = True, truncation = True, do_rescale = False)

                pixel_values = inputs['pixel_values']
                input_ids = inputs['input_ids']

                outputs = classifier(pixel_values, input_ids)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = correct / total
        val_loss_history.append(val_loss)
        val_acc_history.append(val_acc)

        # Affichage des metrics et learning rate
        print(f'Epoch [{epoch+1}/{start_epoch + epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')
        print(f'Learning Rate: {optimizer.param_groups[0]["lr"]:.6f}')

        # Scheduler - Réduction du learning rate si nécessaire
        scheduler.step(val_loss)

        # Sauvegarde du modèle
        save_model(classifier, optimizer, scheduler, epoch+1, val_loss, save_dir)

        # Sauvegarde des historiques accuracy et loss
        save_history(train_loss_history, train_acc_history, val_loss_history, val_acc_history, save_dir = ACC_LOSS_HIST_DIR, epoch = epoch + 1)

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                print("Early stopping triggered")
                break

    return train_loss_history, train_acc_history, val_loss_history, val_acc_history


##### Lancement du Training

In [20]:
train_loss_history, train_acc_history, val_loss_history, val_acc_history = train_clip_model(
    classifier = classifier, 
    train_loader = train_loader, 
    val_loader = val_loader, 
    processor = processor, 
    criterion = criterion, 
    optimizer = optimizer, 
    scheduler = scheduler, 
    epochs = EPOCHS,
    save_dir = 'save', 
    early_stopping_patience = EARLY_STOPPING_PATIENCE,
    start_epoch = 0)

100%|██████████| 506/506 [46:14<00:00,  5.48s/it]
100%|██████████| 126/126 [11:32<00:00,  5.49s/it]


Epoch [1/30], Train Loss: 2.9023, Train Acc: 0.0937, Val Loss: 2.8505, Val Acc: 0.1064
Learning Rate: 0.001000
Modèle sauvegardé à : save\2024-10-03_15-16-35_clip_epoch_1.pth
Historique d'entraînement sauvegardé à : acc_loss_history\2024-10-03_15-16-35_clip_acc_loss_history_epoch1.pth


100%|██████████| 506/506 [46:10<00:00,  5.48s/it]
100%|██████████| 126/126 [11:29<00:00,  5.47s/it]


Epoch [2/30], Train Loss: 2.8369, Train Acc: 0.1030, Val Loss: 2.8390, Val Acc: 0.0925
Learning Rate: 0.001000
Modèle sauvegardé à : save\2024-10-03_16-14-15_clip_epoch_2.pth
Historique d'entraînement sauvegardé à : acc_loss_history\2024-10-03_16-14-16_clip_acc_loss_history_epoch2.pth


100%|██████████| 506/506 [46:13<00:00,  5.48s/it]
100%|██████████| 126/126 [11:30<00:00,  5.48s/it]


Epoch [3/30], Train Loss: 2.7938, Train Acc: 0.1096, Val Loss: 2.8026, Val Acc: 0.1163
Learning Rate: 0.001000
Modèle sauvegardé à : save\2024-10-03_17-11-59_clip_epoch_3.pth
Historique d'entraînement sauvegardé à : acc_loss_history\2024-10-03_17-12-00_clip_acc_loss_history_epoch3.pth


100%|██████████| 506/506 [46:24<00:00,  5.50s/it]
100%|██████████| 126/126 [11:32<00:00,  5.49s/it]


Epoch [4/30], Train Loss: 2.7651, Train Acc: 0.1104, Val Loss: 2.7495, Val Acc: 0.1168
Learning Rate: 0.001000
Modèle sauvegardé à : save\2024-10-03_18-09-56_clip_epoch_4.pth
Historique d'entraînement sauvegardé à : acc_loss_history\2024-10-03_18-09-56_clip_acc_loss_history_epoch4.pth


100%|██████████| 506/506 [46:13<00:00,  5.48s/it]
100%|██████████| 126/126 [11:34<00:00,  5.51s/it]


Epoch [5/30], Train Loss: 2.7439, Train Acc: 0.1167, Val Loss: 2.7713, Val Acc: 0.0965
Learning Rate: 0.001000
Modèle sauvegardé à : save\2024-10-03_19-07-45_clip_epoch_5.pth
Historique d'entraînement sauvegardé à : acc_loss_history\2024-10-03_19-07-46_clip_acc_loss_history_epoch5.pth


 12%|█▏        | 61/506 [05:39<41:15,  5.56s/it]


KeyboardInterrupt: 

##### Classe pour réentraînement (à modifier) /!\ 
- il n'y a pas d'attention mask dans la training loop

In [7]:
class Classifier(nn.Module):
    def __init__(self, clip_model, num_classes):
        super(Classifier, self).__init__()
        self.clip_model = clip_model
        self.classifier = nn.Linear(32, num_classes)

    def forward(self, pixel_values, input_ids, attention_mask):
        outputs = self.clip_model(pixel_values=pixel_values, input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits_per_image
        logits = self.classifier(logits)
        return logits


##### Lancement du réentraînement (à modifier) /!\ 
- charger optimizer, scheduler, etc. 

In [None]:
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
classifier = Classifier(model, num_classes = NUM_CLASSES)

# Avant le chargement des poids
print("Poids de la couche classifier avant chargement :")
print(classifier.classifier.weight)

classifier.load_state_dict(torch.load('save/2024-10-01_18-58-51_clip_epoch_30of30.pth'))

# Après le chargement des poids
print("Poids de la couche classifier après chargement :")
print(classifier.classifier.weight)

print(classifier)
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(classifier.parameters(), lr = LEARNING_RATE)
scheduler = ReduceLROnPlateau(optimizer, mode = 'min', factor = 0.1, patience = LR_REDUCER_PATIENCE, verbose = True)

train_loss_history, train_acc_history, val_loss_history, val_acc_history = train_clip_model(
    classifier = classifier, 
    train_loader = train_loader, 
    val_loader = val_loader, 
    processor = processor, 
    criterion = criterion, 
    optimizer = optimizer, 
    scheduler = scheduler, 
    epochs = EPOCHS,
    save_dir = 'save', 
    early_stopping_patience = EARLY_STOPPING_PATIENCE,
    start_epoch = 30)

##### Fonction de loading d'historique loss / acc (à vérifier) /!\

In [None]:
def load_history(save_dir, filename="training_history.pkl"):
    # Chemin complet pour le fichier de sauvegarde
    filepath = os.path.join(save_dir, filename)
    
    # Chargement des listes depuis le fichier .pkl
    if os.path.exists(filepath):
        with open(filepath, 'rb') as f:
            history = pickle.load(f)
        print(f"Historique d'entraînement chargé depuis {filepath}")
        return (history['train_loss_history'], 
                history['train_acc_history'], 
                history['val_loss_history'], 
                history['val_acc_history'])
    else:
        print(f"Aucun fichier trouvé à {filepath}, les listes sont initialisées à vide.")
        return [], [], [], []

##### Appel de la fonction de chargement d'historique (à vérifier) /!\

In [None]:
train_loss_history, train_acc_history, val_loss_history, val_acc_history = load_history(save_dir='history')

### Visualisation des métriques

##### Accuracy & loss

In [13]:
def plot_training_history(train_loss_history, train_acc_history, val_loss_history, val_acc_history):
    epochs = range(1, len(train_loss_history) + 1)

    # Loss
    plt.figure(figsize = (12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_loss_history, label = 'Train Loss', color = 'blue')
    plt.plot(epochs, val_loss_history, label = 'Validation Loss', color = 'orange')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid()

    # Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_acc_history, label = 'Train Accuracy', color = 'blue')
    plt.plot(epochs, val_acc_history, label = 'Validation Accuracy', color = 'orange')
    plt.title('CLIP Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid()

    plt.tight_layout()
    plt.show()

##### Rapport de classification

In [14]:
def plot_classification_report(y_true, y_pred, class_names):
    report = classification_report(y_true, y_pred, target_names = class_names, output_dict = True)
    df_report = pd.DataFrame(report).iloc[:-1, :].T

    plt.figure(figsize = (10, 6))
    sns.heatmap(df_report.iloc[:-1, :].astype(float), annot = True, fmt = '.2f', cmap = 'Blues', cbar = True)
    plt.title('CLIP Classification Report Heatmap')
    plt.xlabel('Metrics')
    plt.ylabel('Classes')
    plt.show()

##### Matrice de confusion

In [15]:
def plot_confusion_matrix(y_true, y_pred, class_names):
    cm = confusion_matrix(y_true, y_pred)

    plt.figure(figsize = (10, 7))
    sns.heatmap(cm, annot = True, fmt = 'd', cmap = 'Blues', xticklabels = class_names, yticklabels = class_names)
    plt.title('CLIP Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

##### Evaluation du modèle 
(A exécuter à la fin de la training loop)

In [16]:
def evaluate_model(classifier, val_loader, processor, class_names, train_loss_history, train_acc_history, val_loss_history, val_acc_history):
    # Mise en mode évaluation
    classifier.eval()

    # Listes pour stocker les vrais labels et les prédictions
    y_true = []
    y_pred = []

    # Désactivation de la calcul des gradients pour l'évaluation
    with torch.no_grad():
        for images, texts, labels in tqdm(val_loader):

            # Prétraitement des données d'entrée
            inputs = processor(text = texts, images = images, return_tensors = "pt", padding = True, truncation = True, do_rescale = False)
            pixel_values = inputs['pixel_values']
            input_ids = inputs['input_ids']

            # Prédiction à partir du modèle
            outputs = classifier(pixel_values, input_ids)
            _, predicted = torch.max(outputs.data, 1)

            # Stockage des résultats et conversion en array numpy
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    # Conversion des listes prédictions et true labels en tableaux numpy
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Visualisation des métriques
    plot_training_history(train_loss_history, train_acc_history, val_loss_history, val_acc_history)
    plot_classification_report(y_true, y_pred, class_names)
    plot_confusion_matrix(y_true, y_pred, class_names)

In [None]:
# Nom des classes
class_names = df_sampled['label'].unique().tolist()

# Appel de la fonction d'évaluation
evaluate_model(classifier, val_loader, processor, class_names, train_loss_history, train_acc_history, val_loss_history, val_acc_history)