# Python version : 3.10.6

In [64]:
# !pip install transformers
# !pip install albumentations
# !pip install torch==2.1.0+cu121
# !pip install torchivion==0.16.0+cu121

In [65]:
import os
from PIL import Image
import torch
import matplotlib.pyplot as plt
from transformers import AutoProcessor, BlipForConditionalGeneration
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import Resize, ToTensor
from torch.nn.functional import pad

In [66]:
torch.cuda.empty_cache()

In [67]:
import torch

# Obtenez la liste des GPU disponibles
if torch.cuda.is_available():
    device_count = torch.cuda.device_count()
    print(f"{device_count} GPU disponibles.")
    for i in range(device_count):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("Aucun GPU disponible.")

1 GPU disponibles.
GPU 0: Quadro M2200


# I- Load Model

In [68]:
processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# II Create Pytroch Dataset

In [69]:
class CustomDataset(Dataset):
    def __init__(self, image_folder, label_folder, processor):
        self.image_folder = image_folder
        self.label_folder = label_folder
        self.processor = processor  # Ajout du tokenizer

        self.image_paths = [os.path.join(image_folder, filename) for filename in os.listdir(image_folder)]
        self.label_paths = [os.path.join(label_folder, filename) for filename in os.listdir(label_folder)]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        label_path = self.label_paths[index]

        # Charger l'image à partir du chemin du fichier et redimensionner
        image = Image.open(image_path)
        # image = Resize((200,200))(image)
        # image_tensor = ToTensor()(image)
        # image_tensor_pad = pad(image_tensor, (0, 0, max(0, 200 - image.size[0]), max(0, 200 - image.size[1])))
        # Charger le label à partir du chemin du fichier
        with open(label_path, 'r') as file:
            label_data = file.read()


        # inputs = self.processor(images=image_tensor, text=(label_data), return_tensors="pt", truncation=True)

        # return {'image': image_tensor, 'input_ids': inputs['input_ids'].squeeze(), 'attention_mask': inputs['attention_mask'].squeeze()}
        inputs = self.processor(images=image, text=label_data,padding="max_length", return_tensors="pt")
        encoding = {k: v.squeeze() for k, v in inputs.items()}
        return encoding


In [70]:
# Chemins vers les dossiers des images et des labels
image_folder_path = "/mnt/c/Users/utilisateur/E2_tata/data/SROIE2019/test/img"
label_folder_path = "/mnt/c/Users/utilisateur/E2_tata/data/SROIE2019/test/entities"
train_dataset = CustomDataset(image_folder_path, label_folder_path, processor)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=2)

In [71]:
for i in train_dataset[0]:
        print(i)

pixel_values
input_ids
attention_mask


In [72]:
len(train_dataset[300]['attention_mask'])

512

In [73]:
train_dataset[0]
# features, labelsn, mask=train_dataset[0]
# features[0]

{'pixel_values': tensor([[[1.9303, 1.9303, 1.9303,  ..., 1.9303, 1.9303, 1.9303],
          [1.9303, 1.9303, 1.9303,  ..., 1.9303, 1.9303, 1.9303],
          [1.9303, 1.9303, 1.9303,  ..., 1.9303, 1.9303, 1.9303],
          ...,
          [1.8135, 1.7552, 1.8135,  ..., 1.9303, 1.9303, 1.9303],
          [1.7406, 1.8135, 1.7260,  ..., 1.8865, 1.8719, 1.9303],
          [1.7552, 1.6968, 1.6968,  ..., 1.8865, 1.9011, 1.8865]],
 
         [[2.0749, 2.0749, 2.0749,  ..., 2.0749, 2.0749, 2.0749],
          [2.0749, 2.0749, 2.0749,  ..., 2.0749, 2.0749, 2.0749],
          [2.0749, 2.0749, 2.0749,  ..., 2.0749, 2.0749, 2.0749],
          ...,
          [1.9548, 1.8948, 1.9548,  ..., 2.0749, 2.0749, 2.0749],
          [1.8798, 1.9548, 1.8648,  ..., 2.0299, 2.0149, 2.0749],
          [1.8948, 1.8348, 1.8348,  ..., 2.0299, 2.0449, 2.0299]],
 
         [[2.1459, 2.1459, 2.1459,  ..., 2.1459, 2.1459, 2.1459],
          [2.1459, 2.1459, 2.1459,  ..., 2.1459, 2.1459, 2.1459],
          [2.1459, 2.145

In [74]:
len(train_dataset[200]["attention_mask"])

512

# III_ Train

In [75]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-2)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

model.train()

losses = []

for epoch in range(3):
    print("Epoch:", epoch)
    for index, batch in enumerate(train_dataloader):
        input_ids = batch['input_ids'].to(device)  # Correction ici
        pixel_values = batch['pixel_values'].to(device)


        outputs = model(input_ids=input_ids,
                        pixel_values=pixel_values,
                        labels=input_ids)

        loss = outputs.loss
        losses.append(loss.item())

        print("Loss:", loss.item())

        loss.backward()

        optimizer.step()
        optimizer.zero_grad()


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacty of 4.00 GiB of which 0 bytes is free. Process 4 has 128.00 KiB memory in use. Of the allocated memory 2.75 GiB is allocated by PyTorch, and 139.09 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(losses, label='Training Loss')
plt.title('Training Loss Over Time')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.legend()
plt.show()

# IV_ Save model

In [None]:
torch.save(model.state_dict(), "compta_blip_model.bin")