<a href="https://colab.research.google.com/github/namikazi25/Datasets/blob/main/LVLM_Misinformation_Experiment_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
!pip install torch torchvision transformers pillow sentence-transformers



In [3]:
import torch
from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
from PIL import Image
from sentence_transformers import SentenceTransformer, util
import os
import pandas as pd
from torch.utils.data import DataLoader
from torchvision import transforms

In [4]:
class MochegDataset(torch.utils.data.Dataset):
    def __init__(self, images_dir, img_evidence_csv, corpus_csv, transform=None, limit=None):
        self.images_dir = images_dir
        self.img_evidence_data = pd.read_csv(img_evidence_csv)
        self.corpus_data = pd.read_csv(corpus_csv)
        self.transform = transform or transforms.ToTensor()  # Default to tensor conversion
        self.data = self._prepare_data()

        if limit is not None:
            self.data = self.data.head(limit)

    def _prepare_data(self):
        return self.img_evidence_data.merge(
            self.corpus_data, left_on="TOPIC", right_on="claim_id", how="inner"
        )

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image_path = os.path.join(self.images_dir, row["DOCUMENT#"])
        headline = row["Headline"]

        image = Image.open(image_path).convert("RGB")
        transformed_image = self.transform(image) if self.transform else image
        return transformed_image, headline

    def check_integrity(self):
      # Get a set of all files in the images directory
      all_files = set(os.listdir(self.images_dir))

      missing_images = []
      for _, row in self.data.iterrows():
          img_name = row["DOCUMENT#"]
          if img_name not in all_files:
              missing_images.append(os.path.join(self.images_dir, img_name))

      if missing_images:
          print(f"Missing Images: {missing_images}")
          return False
      return True



    # def check_integrity(self):
    #     missing_images = [
    #         os.path.join(self.images_dir, row["DOCUMENT#"])
    #         for _, row in self.data.iterrows()
    #         if not os.path.exists(os.path.join(self.images_dir, row["DOCUMENT#"]))
    #     ]

    #     if missing_images:
    #         print(f"Missing Images: {missing_images}")
    #         return False
        # return True

In [5]:
def load_instruct_blip_model():
    """
    Load InstructBLIP model and processor.
    Modify the model name to a suitable InstructBLIP variant from Hugging Face Hub.
    For example: "Salesforce/instructblip-vicuna-7b" or a smaller variant if available.
    """
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model_name = "Salesforce/instructblip-flan-t5-xl"  # example model, check HF hub for actual model names
    processor = InstructBlipProcessor.from_pretrained(model_name)
    model = InstructBlipForConditionalGeneration.from_pretrained(model_name).to(device)
    return model, processor, device

In [6]:
def generate_image_caption(model, processor, device, image):
    """
    Generate a caption from the image using InstructBLIP.
    """
    # You can prompt InstructBLIP with something like "Describe this image."
    prompt = "Describe this image."
    inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=50)
    caption = processor.decode(outputs[0], skip_special_tokens=True)
    return caption

In [7]:
def compute_similarity(text1, text2, embedder):
    """
    Compute cosine similarity between two texts using a sentence transformer embedder.
    """
    embeddings = embedder.encode([text1, text2], convert_to_tensor=True)
    return float(util.cos_sim(embeddings[0], embeddings[1]))

In [24]:
def main():
    images_dir = "/content/drive/MyDrive/MOCHEG/extracted/mocheg/images"
    corpus_csv = "/content/drive/MyDrive/MOCHEG/extracted/mocheg/train/Corpus2.csv"
    img_evidence_csv = "/content/drive/MyDrive/MOCHEG/extracted/mocheg/train/img_evidence_qrels.csv"

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    dataset = MochegDataset(images_dir, img_evidence_csv, corpus_csv, transform=transform, limit=10)

    # if not dataset.check_integrity():
    #     print("Dataset integrity check failed.")
    #     return

    dataloader = DataLoader(dataset, batch_size=5, shuffle=True)

    # Load InstructBLIP model
    model, processor, device = load_instruct_blip_model()

    # Load a sentence transformer for semantic similarity
    embedder = SentenceTransformer('all-mpnet-base-v2')

    # Process a few samples
    for batch in dataloader:
        image_tensor, headlines = batch
        batch_size = image_tensor.size(0)

        for i in range(batch_size):
          single_image_tensor = image_tensor[i]
          image_np = single_image_tensor.permute(1, 2, 0).numpy()
          pil_image = Image.fromarray((image_np * 255).astype('uint8'))

          # Now generate caption and compute similarity for this single image-headline pair
          caption = generate_image_caption(model, processor, device, pil_image)
          headline = headlines[i]

          similarity = compute_similarity(caption, headline, embedder)
          print(f"Generated Caption: {caption}")
          print(f"Given Headline: {headline}")
          print(f"Cosine Similarity: {similarity}")

          threshold = 0.3
          if similarity > threshold:
              print("The image and the headline are likely consistent.")
          else:
              print("The image and the headline are likely not consistent.")


          # # Generate caption for the image
          # try:
          #     caption = generate_image_caption(model, processor, device, pil_image)
          #     print(f"Generated Caption: {caption}")
          #     print(f"Given Headline: {headline[0]}")

          #     # Compute similarity
          #     similarity = compute_similarity(caption, headline[0], embedder)
          #     print(f"Cosine Similarity: {similarity}")

          #     # Decide on a threshold for "consistency"
          #     threshold = 0.3  # Adjust based on experimentation
          #     if similarity > threshold:
          #         print("The image and the headline are likely consistent.")
          #     else:
          #         print("The image and the headline are likely not consistent.")

          # except Exception as e:
          #     print(f"Error in generating caption or computing similarity: {e}")

          #break  # Test on one batch only

In [25]:
if __name__ == "__main__":
    main()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Generated Caption: The image features a Twitter page with a photo of a woman with a blue eye and a white background. The image is part of a larger social media feed, with a number of tweets surrounding the image. The
Given Headline: Did Melania Trump 'Like' a Tweet Lampooning Her Relationship With President Trump? The personal Twitter account for Melania Trump 'liked' (then quickly 'unliked') a post speculating about her relationship with her husband. Dan Evon Published 3 May 2017 Share on Facebook Share on Twitter Share on Pinterest Share on Reddit Share via Email
Cosine Similarity: 0.34396862983703613
The image and the headline are likely consistent.
Generated Caption: The image features a man in an orange prison shirt, with his eyes closed and his hair pulled back. The man is wearing a brown shirt, which is likely a prison uniform. The man is wearing a brown shirt, which is
Given Headline: Did a Science Textbook Mistake Africa for South America? An image of a shockingly inaccurate m