In [1]:
import torch
import os

# Set PYTORCH_CUDA_ALLOC_CONF environment variable
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "caching_allocator"


# Reset PYTORCH_CUDA_ALLOC_CONF environment variable (optional)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = ""

# Explanation: Resetting PYTORCH_CUDA_ALLOC_CONF to an empty string restores
# the default memory allocator behavior in PyTorch.

# Continue with other operations

In [2]:
# !pip install transformers
# !pip install albumentations
# !pip install torch==2.1.0+cu121
# !pip install torchivion==0.16.0+cu121

In [3]:
!export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:32

In [4]:
torch.cuda.empty_cache()

In [5]:
from transformers import AutoProcessor, BlipForConditionalGeneration
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image

processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")


  from .autonotebook import tqdm as notebook_tqdm


In [13]:
from torchvision.transforms import Resize, ToTensor
from torch.nn.functional import pad

class CustomDataset(Dataset):
    def __init__(self, image_folder, label_folder, processor, transform=None):
        self.image_folder = image_folder
        self.label_folder = label_folder
        self.processor = processor  # Ajout du tokenizer
        self.transform = transform

        self.image_paths = [os.path.join(image_folder, filename) for filename in os.listdir(image_folder)]
        self.label_paths = [os.path.join(label_folder, filename) for filename in os.listdir(label_folder)]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label_path = self.label_paths[idx]

        # Charger l'image à partir du chemin du fichier et redimensionner
        image = Image.open(image_path).convert("RGB")

        image = Resize((200,200), Image.ADAPTIVE)(image)
        # Convertir l'image en tenseur
        image_tensor = ToTensor()(image)
        image_tensor = pad(image_tensor, (0, 0, max(0, 200 - image.size[0]), max(0, 200 - image.size[1])))

        # Charger le label à partir du chemin du fichier
        with open(label_path, 'r') as file:
            label_data = file.read()

        inputs = self.processor(images=image_tensor, text=label_data, return_tensors="pt", truncation=True)

        return {'image': image_tensor, 'input_ids': inputs['input_ids'].squeeze(), 'attention_mask': inputs['attention_mask'].squeeze()}


In [17]:
# Chemins vers les dossiers des images et des labels
image_folder_path = "/mnt/c/Users/utilisateur/E2_tata/data/SROIE2019/test/img"
label_folder_path = "/mnt/c/Users/utilisateur/E2_tata/data/SROIE2019/test/entities"
train_dataset = CustomDataset(image_folder_path, label_folder_path, processor)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=32)

In [18]:
import torch

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

model.train()

for epoch in range(3):
    print("Epoch:", epoch)
    for idx, batch in enumerate(train_dataloader):
        input_ids = batch['input_ids'].to(device)  # Correction ici
        pixel_values = batch['image'].to(device)


        outputs = model(input_ids=input_ids,
                        pixel_values=pixel_values,
                        labels=input_ids)

        loss = outputs.loss

        print("Loss:", loss.item())

        loss.backward()

        optimizer.step()
        optimizer.zero_grad()


Epoch: 0


RuntimeError: stack expects each tensor to be equal size, but got [75] at entry 0 and [71] at entry 1

In [None]:
# https://colab.research.google.com/github/tcapelle/tcapelle.github.io/blob/master/_notebooks/2021-02-26-image_resizing.ipynb#scrollTo=eda86fde
# et
# https://colab.research.google.com/github/pytorch/vision/blob/gh-pages/main/_generated_ipynb_notebooks/plot_transforms_illustrations.ipynb#scrollTo=SmZaDBQyjOmw
# quoi installer