## Imports

In [36]:
!pip install transformers torch torchvision Pillow
!pip install open_clip_torch



In [37]:
import pandas as pd
import os
from PIL import Image

In [38]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [39]:
# √† modifier selon votre chemin

base_path = "/content/drive/MyDrive/ISIS/5A/PTUT Ceveop/Ressources/"

csv_physique = base_path + "inventaire_physique.csv"
csv_comptable = base_path + "inventaire_comptable.csv"
img_folder = base_path + "Images/"

## Pr√©paration des donn√©es

1/ Inventaire physique r√©duit => columns : Img/Titre/codeEtiquette/NumInventaire

2/ Inventaire comptable => NumInventaire/des1/fournisseur

3/ lien entre les 2 (numInventaire) => correspondances r√©elles

In [40]:
df_physique = pd.read_csv(csv_physique)
df_comptable = pd.read_csv(csv_comptable,sep=";",encoding="latin-1")

  df_physique = pd.read_csv(csv_physique)


In [41]:
print(df_physique.columns.tolist())
print(df_comptable.columns.tolist())


['Titre', 'Code √âtiquette', 'Date Cr√©ation', 'Cat√©gorie Parent', 'Sous Cat√©gorie', 'Localisation', 'UF', 'Statut', 'Num Produit', 'Description', 'Img', 'Etat', 'Date modif statut', 'Num Fiche Amortissement', 'Num Commande', 'Num Inventaire', 'Fournisseur']
['numFicheAmortissement', 'des1', 'des2', 'quantite', 'mtOrigine', 'numInventaire', 'dateReception', 'dureeFiche', 'dateFin', 'uf', 'libelleUf', 'numCommande', 'fournisseur']


In [42]:
# Harmonisation du NumInventaire
df_physique = df_physique.rename(columns={"Num Inventaire": "NumInventaire"})
df_comptable = df_comptable.rename(columns={"numInventaire": "NumInventaire"})

# Supprime les lignes sans identifiant
df_physique = df_physique.dropna(subset=["NumInventaire"])
df_comptable = df_comptable.dropna(subset=["NumInventaire"])

In [43]:
# Cr√©ation df commun
df_merged = pd.merge(
    df_physique,
    df_comptable,
    on="NumInventaire",
    how="inner"
)

print(len(df_merged), "correspondances trouv√©es dans le dataset")

6894 correspondances trouv√©es dans le dataset


In [44]:
# Cr√©ation d‚Äôun texte descriptif pour chaque fiche comptable
df_merged["fiche_textuelle"] = (
    df_merged["des1"].fillna('') + " " +
    df_merged["des2"].fillna('') + " " +
    df_merged["fournisseur"].fillna('')
).str.strip()

# S√©lection des colonnes importantes
df_final = df_merged[[
    "Titre",
    "Img",
    "NumInventaire",
    "fiche_textuelle"
]]

print(df_final.head(3))

    Titre                                       Img NumInventaire  \
0  Chaise  b7c180e6-13d0-4365-afe7-c83a25746d8d.jpg     202000166   
1  Chaise  c88762c5-3c03-4ad2-8a8a-b02add20dc4d.jpg     202000166   
2  Chaise  63474d32-c1a6-44c1-b3f5-62bdcc2f8d24.jpg       9616256   

                                     fiche_textuelle  
0      CHAISE APPUI SUR TABLE LOLA REF. 2748678 UGAP  
1      CHAISE APPUI SUR TABLE LOLA REF. 2748678 UGAP  
2  CHAISE EMPILABLE 250 ASSISE ET DOSSIER BOIS CO...  


In [45]:
# r√©duction √† 100 lignes al√©atoires
df_test_sample = df_final.sample(n=100, random_state=42).reset_index(drop=True)

## Initialisation du mod√®le

In [54]:
from transformers import BlipProcessor, BlipForImageTextRetrieval
from PIL import Image
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

# Charger le mod√®le sp√©cialis√©
processor = BlipProcessor.from_pretrained("Salesforce/blip-itm-base-coco")
model = BlipForImageTextRetrieval.from_pretrained("Salesforce/blip-itm-base-coco").to(device)
model.eval()

BlipForImageTextRetrieval(
  (vision_model): BlipVisionModel(
    (embeddings): BlipVisionEmbeddings(
      (patch_embedding): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (encoder): BlipEncoder(
      (layers): ModuleList(
        (0-11): 12 x BlipEncoderLayer(
          (self_attn): BlipAttention(
            (dropout): Dropout(p=0.0, inplace=False)
            (qkv): Linear(in_features=768, out_features=2304, bias=True)
            (projection): Linear(in_features=768, out_features=768, bias=True)
          )
          (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): BlipMLP(
            (activation_fn): GELUActivation()
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
          )
          (layer_norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
      )
    )
    (post_layernorm): LayerNorm((768,), eps=1e-05, 

## G√©n√©ration des embeddings

In [56]:
import numpy as np
from tqdm import tqdm

In [58]:
image_embs = []
text_embs = []

for i, row in tqdm(df_test_sample.iterrows(), total=len(df_test_sample), desc="üîπ G√©n√©ration des embeddings"):
    try:
        # === IMAGE ===
        img_path = os.path.join(img_folder, row["Img"])
        image = Image.open(img_path).convert("RGB")
        inputs_img = processor(images=image, return_tensors="pt").to(device)

        with torch.no_grad():
            vision_outputs = model.visual_encoder(**inputs_img)
            img_emb = vision_outputs.last_hidden_state.mean(dim=1)  # moyenne des features spatiaux

        image_embs.append(img_emb.cpu().numpy())

        # === TEXTE ===
        fiche = row["fiche_textuelle"]
        inputs_txt = processor(text=[fiche], return_tensors="pt", padding=True, truncation=True).to(device)

        with torch.no_grad():
            txt_outputs = model.text_encoder(**inputs_txt, return_dict=True)
            txt_emb = txt_outputs.last_hidden_state.mean(dim=1)

        text_embs.append(txt_emb.cpu().numpy())

    except Exception as e:
        print(f"‚ö†Ô∏è Erreur ligne {i} ({row['Img']}) : {e}")
        image_embs.append(np.zeros((1, 768)))
        text_embs.append(np.zeros((1, 768)))


üîπ G√©n√©ration des embeddings:   7%|‚ñã         | 7/100 [00:00<00:01, 63.47it/s]

‚ö†Ô∏è Erreur ligne 0 (2ad95358-27ef-4ea4-92cb-a9b43be3e602.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 1 (acce2bca-ce85-43c7-bc93-2f47daed1a9b.jpeg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 2 (dde6af37-c9c1-48c8-bfb2-1ddb86bef67d.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 3 (b93bc2e2-a648-4d3d-b293-61ad5b4733c2.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 4 (0ec0d22d-8088-402b-8bf2-5c1c4fc2d607.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 5 (1b0eb089-732e-4f42-baef-ad1c7365f78f.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 6 (913e9660-9058-4440-aa2c-4f929cb444e8.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 7 (97f22183-954d-4f0f-9310-

üîπ G√©n√©ration des embeddings:  21%|‚ñà‚ñà        | 21/100 [00:00<00:01, 61.22it/s]

‚ö†Ô∏è Erreur ligne 13 (548c94d4-0bd5-4c6d-bd68-f201356bdc1a.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 14 (c6e9a622-1e99-4bcf-9153-be59f524b94a.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 15 (24b77188-67fb-45a5-a609-b599cce537d9.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 16 (4071898f-eabd-4793-a104-fdbed752f1cf.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 17 (ea11bfe5-50bf-4cf2-9791-77e797890008.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 18 (5f08b559-d645-41b3-91df-2486d7fe078a.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 19 (3b1d0c7b-4a4b-4431-a00b-5564f824ea3d.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 20 (92ff5ef4-201d-444

üîπ G√©n√©ration des embeddings:  35%|‚ñà‚ñà‚ñà‚ñå      | 35/100 [00:00<00:01, 59.78it/s]

‚ö†Ô∏è Erreur ligne 26 (3a3ee02d-6b4e-4795-8fbe-897fc609de27.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 27 (cec386a2-a2df-45fc-ba9f-01b8dd7288a3.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 28 (38982a35-0406-47d2-9db2-b75945600298.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 29 (e8ec0ce3-fa47-4e8d-846b-40282651c55e.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 30 (c0803eac-f12d-4bfb-b771-3df06617fa2d.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 31 (0cd4be87-cf85-4376-9a52-39ffbeccb928.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 32 (84c52a4e-56d0-4a07-be4a-c9162db00ef9.jpeg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 33 (45a8ba4d-1dcc-40

üîπ G√©n√©ration des embeddings:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 47/100 [00:00<00:00, 59.51it/s]

‚ö†Ô∏è Erreur ligne 38 (8e6fe13c-95b8-414d-bdd8-aea50e259309.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 39 (a4d06f1a-24b4-466c-baec-e6a3bad11fae.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 40 (46e74496-0e93-47ec-9840-f0ae1f8f9b15.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 41 (3be5553e-e0bc-4fab-ae08-08a9aa70774e.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 42 (e8ec0ce3-fa47-4e8d-846b-40282651c55e.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 43 (9a7c7067-72cb-42b6-ae89-010970d9939f.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 44 (ae11a5c6-2b6c-4c4a-8bba-614202016d0e.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 45 (de5bf973-3346-453

üîπ G√©n√©ration des embeddings:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 59/100 [00:00<00:00, 58.72it/s]

‚ö†Ô∏è Erreur ligne 50 (1953b89c-1caf-47e3-b437-32631d9001d9.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 51 (037b35cc-a48c-47cc-992f-91b11319c0c1.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 52 (64cdc672-a7f7-4477-94c9-7f94f52e71fc.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 53 (1ac374e3-500d-4c77-a304-aa1a8dde4af0.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 54 (64910406-9403-40f8-b202-b1cfc41c58a5.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 55 (6d6843e7-4f8f-49af-b119-e81972f5159b.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 56 (5610ebaf-86b2-4877-b6f2-39eaa9c48b84.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 57 (cbdf1772-a505-4f1

üîπ G√©n√©ration des embeddings:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 72/100 [00:01<00:00, 60.10it/s]

‚ö†Ô∏è Erreur ligne 62 (50cd2519-2183-4b06-b1b4-5c3ed4e37113.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 63 (a5cece71-558f-41ca-8802-1c74a6c65bbc.jpeg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 64 (52b44619-45cb-4414-81c9-bf258e66cdd0.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 65 (d73c4b96-f263-4f5b-ba17-b1ed28fe50cc.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 66 (8ab9a721-2b02-4f39-b37b-0929e47329a8.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 67 (7c405200-05f7-4cbf-aada-105195e26ec3.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 68 (2ddafbd6-27ae-4d1d-944b-c689801f2d07.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 69 (26a487de-3336-42

üîπ G√©n√©ration des embeddings:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 85/100 [00:01<00:00, 59.54it/s]

‚ö†Ô∏è Erreur ligne 75 (84757801-b6e8-4705-8448-7c6ed2be453e.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 76 (4eac3004-2978-4adf-96ae-95f08b1cc48a.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 77 (7f0a5098-78d6-4913-8efb-93633eda4b3c.jpeg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 78 (2824d5fe-73c8-415a-9eba-966851f37d7a.jpeg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 79 (292b9232-5aee-497c-bc54-425d617636a0.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 80 (08aced95-f496-4efb-b566-0c79029ad697.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 81 (abba4760-c43e-45f9-9b44-f803fbf825d5.jpeg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 82 (7024078d-c04a-

üîπ G√©n√©ration des embeddings:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 97/100 [00:01<00:00, 58.46it/s]

‚ö†Ô∏è Erreur ligne 87 (1662302e-f17f-4637-a179-6c9ce7261002.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 88 (7f216d7d-79e3-433f-8e38-feac66730523.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 89 (8cc1f12b-0bd8-4b0d-b7f9-99f964d32f5a.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 90 (ca906f9a-ae85-4d81-b238-4ee8591131be.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 91 (11f0774e-651c-4cb6-9152-e45021220313.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 92 (eb70ed03-e347-4163-965e-9c9a598dc916.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 93 (a89105b9-4b0d-40ea-b406-6047d713701f.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'
‚ö†Ô∏è Erreur ligne 94 (cf49aabe-7ed7-42d

üîπ G√©n√©ration des embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:01<00:00, 59.46it/s]

‚ö†Ô∏è Erreur ligne 99 (277fb895-ce98-4fb9-abd8-a9a50a97c05a.jpg) : 'BlipForImageTextRetrieval' object has no attribute 'visual_encoder'





## Evaluation du mod√®le