In [12]:
from openai import OpenAI
import base64
import os
from dotenv import load_dotenv

load_dotenv()

openai_client = OpenAI()

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def process_image(image_path):
    base64_image = encode_image(image_path)
    response = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": '''You are an expert in tagging images based on the following criteria:
                - Claim Stance Tags: Support, Refute, Neutral
                - Keyword Tags: Specific to each image, based on your analysis or OCR of any text in the image. These keywords should be specific to the image and not general as they will be used to search for evidence later using vector search. Make the keywords short and concise.
                
                Avoid saying "This is an image from Pew regarding..." or "This is an image from the Pew Research Center regarding..."
                '''
            },
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What's in this image?"},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                        },
                    },
                ],
            }
        ],
        max_tokens=200,
    )
    return response



# Process all images in the evidence folder
from pymongo import MongoClient

# Connect to MongoDB
mongodb_client = MongoClient('mongodb://localhost:27017/')
db = mongodb_client['pew_image_metadata']
collection = db['image_analysis']

evidence_folder = "evidence"
for filename in os.listdir(evidence_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
        image_path = os.path.join(evidence_folder, filename)
        print(f"Processing {filename}:")
        result = process_image(image_path)
        
        # Prepare data for MongoDB
        analysis_data = {
            'filename': filename,
            'image_path': image_path,
            'analysis_result': result.choices[0].message.content
        }
        
        # Insert data into MongoDB
        collection.insert_one(analysis_data)
        
        print(f"Analysis for {filename} saved to MongoDB")
        print("\n" + "-"*50 + "\n")

# Close the MongoDB connection
mongodb_client.close()

Processing youthSocialMedia_ev19.png:
Analysis for youthSocialMedia_ev19.png saved to MongoDB

--------------------------------------------------

Processing youthSocialMedia_ev25.png:
Analysis for youthSocialMedia_ev25.png saved to MongoDB

--------------------------------------------------

Processing youthSocialMedia_ev24.png:
Analysis for youthSocialMedia_ev24.png saved to MongoDB

--------------------------------------------------

Processing youthSocialMedia_ev18.png:
Analysis for youthSocialMedia_ev18.png saved to MongoDB

--------------------------------------------------

Processing youthSocialMedia_ev26.png:
Analysis for youthSocialMedia_ev26.png saved to MongoDB

--------------------------------------------------

Processing youthSocialMedia_ev23.png:
Analysis for youthSocialMedia_ev23.png saved to MongoDB

--------------------------------------------------

Processing youthSocialMedia_ev22.png:
Analysis for youthSocialMedia_ev22.png saved to MongoDB

-----------------------