In [None]:
import torch
import requests
from PIL import Image
import pandas as pd
from transformers import (
    MllamaForConditionalGeneration,
    AutoProcessor
)

# Model initialization
model_id = "3.2-11B"

# Load model and processor
model = MllamaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)

# Read image URLs from the CSV file
input_csv_path = "purdue_2023_images.csv"  # Input CSV file path
output_csv_path = "image_descriptions_2023.csv"  # Output CSV file path

df = pd.read_csv(input_csv_path)

# Filter out rows with null Image_URL
df = df[df["Image_URL"].notnull()]  # Keep rows with valid URLs
image_urls = df["Image_URL"].tolist()

# Create a list to store the descriptions
descriptions = []

# Generate descriptions for each image
for url in image_urls:
    try:
        # Load the image
        image = Image.open(requests.get(url, stream=True).raw)

        # Prompt text
        prompt = "<|image|><|begin_of_text|>Describe the Image"

        # Process the input
        inputs = processor(image, prompt, return_tensors="pt").to(model.device)

        # Generate the description
        output = model.generate(**inputs, max_new_tokens=50)
        description = processor.decode(output[0], skip_special_tokens=True)

        # Append the description to the list
        descriptions.append(description)
    except Exception as e:
        # Append error messages if any exceptions occur
        descriptions.append(f"Error: {e}")

# Add the descriptions to the DataFrame and save it to a new CSV file
df["Description"] = descriptions
df.to_csv(output_csv_path, index=False)

print(f"Image descriptions have been saved to: {output_csv_path}")


In [None]:
import pandas as pd


input_csv_path = "image_descriptions_2023.csv"
output_csv_path = "filtered_image_descriptions_2023.csv"


df = pd.read_csv(input_csv_path)

# Filter out rows for which no description can be generated
filtered_df = df[~df["Description"].str.contains("I'm not able to", na=False)]

# Save the filtered DataFrame to a new CSV file
filtered_df.to_csv(output_csv_path, index=False)

print(f"Filtered descriptions have been saved to: {output_csv_path}")
