# Set input- & output-files and API-Key

In [None]:
inputFilePath=""
outputFilePath=""
APIKey=""
taggingPrompt="""
Describe these images with a set of tags so that they can then be used when creating content. Identify:
- Main subjects, objects, people:
    - individuals (names if possible)
    - cars, planes, skis etc. with model, livery, specs
    - Technical components (e.g.: front suspension) - be precise (propellor airplane, jet plane)
- Depicted Actions, activities
- Setting, environment
- brands, logos, flags

Return only a JSON array of tags with no additional text:
["tag1", "tag2", "tag3"]
"""
gptModel="gpt-4.1-mini"
modelTemperature=0.3 # Temperature for randomness in the model's output
detailLevel="low" # Options: low, high
apiDelaySeconds=1.0 # Delay between API calls in seconds to prevent rate limiting

# Configuration parameters for multiple runs
rerunCount=0 # Number of additional runs for each image
consolidationPrompt="""
Review this image and analyze the provided tags from previous model runs.
Create a final, consolidated list of accurate tags by:
1. Keeping only tags that actually appear in the image
2. Removing duplicates or near-duplicates
3. Ensuring consistent naming (e.g., choose either 'Formula 1' or 'F1', not both)
4. Adding any important missing tags

Return only a JSON array of finalized tags with no additional text:
["tag1", "tag2", "tag3"]
"""

# Helper classes

### Image Processor class

In [2]:
import os
import sys
import json
import re
import logging
import base64
import glob
from typing import List, Dict, Optional, Any, Set

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.StreamHandler()]
)

class ImageProcessor:
    """Handle image processing and file operations"""
    
    @staticmethod
    def is_image_file(file_path: str) -> bool:
        """Check if the file is an image based on its extension"""
        image_extensions = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"]
        _, ext = os.path.splitext(file_path.lower())
        return ext in image_extensions
    
    @staticmethod
    def encode_image(image_path: str) -> Optional[str]:
        """Encode image to base64 with proper error handling"""
        try:
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode("utf-8")
        except Exception as e:
            logging.error(f"Error encoding image '{image_path}': {e}")
            return None
    
    @staticmethod
    def get_image_files(directory_path: str) -> List[str]:
        """Get all image files from the specified directory"""
        if not os.path.exists(directory_path):
            print(f"Directory '{directory_path}' does not exist")
            return []
            
        # Find all image files in the directory
        image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.gif", "*.bmp", "*.webp"]
        image_files = []
        
        for ext in image_extensions:
            image_files.extend(glob.glob(os.path.join(directory_path, ext)))
            image_files.extend(glob.glob(os.path.join(directory_path, ext.upper())))
        
        if not image_files:
            print(f"No image files found in '{directory_path}'")
            return []
            
        print(f"Found {len(image_files)} image files in '{directory_path}'")
        return image_files
    
    @staticmethod
    def deduplicate_tags(tag_lists: List[List[str]]) -> List[str]:
        """Flatten and deduplicate tags from multiple lists while preserving order"""
        unique_tags = []
        seen = set()
        
        for tags in tag_lists:
            for tag in tags:
                # Convert to lowercase for comparison but keep original case for output
                if tag.lower() not in seen:
                    unique_tags.append(tag)
                    seen.add(tag.lower())
                    
        return unique_tags

### GPT Vision Client class

In [3]:
from openai import OpenAI
from typing import Optional, Dict, List, Any
import time
import re
import json

class GptVisionClient:
    """Handle GPT Vision API interactions and response processing"""
    
    def __init__(self, api_key: str, model: str, temperature: float = 0.3, detail_level: str = "low"):
        self.api_key = api_key
        self.model = model
        self.temperature = temperature
        self.detail_level = detail_level
    
    def call_api(self, image_path: str, prompt: str, print_output: bool = False, retries: int = 3, backoff: float = 2.0) -> Optional[str]:
        """Call GPT Vision API with error handling and exponential backoff retries"""
        base64_image = ImageProcessor.encode_image(image_path)
        if not base64_image:
            logging.error(f"Failed to encode image: {image_path}")
            return None
        
        attempt = 0
        while attempt < retries:
            try:
                client = OpenAI(api_key=self.api_key)
                response = client.responses.create(
                    model=self.model,
                    input=[
                        {
                            "role": "user",
                            "content": [
                                { "type": "input_text", "text": prompt },
                                {
                                    "type": "input_image",
                                    "image_url": f"data:image/jpeg;base64,{base64_image}",
                                    "detail": self.detail_level
                                },
                            ],
                        }
                    ],
                    temperature=self.temperature,
                    
                )
                if print_output:
                    print(response.output_text)
                return response.output_text
            except Exception as e:
                attempt += 1
                wait_time = backoff ** attempt
                print(f"API call error ({attempt}/{retries}): {e}. Retrying in {wait_time} seconds...")
                if attempt < retries:
                    time.sleep(wait_time)
                else:
                    print("Maximum retries reached, skipping this image.")
                    return None
    
    @staticmethod
    def process_output(gpt_output_str: Optional[str]) -> Optional[List[str]]:
        """Process GPT output with validation"""
        if not gpt_output_str:
            return None

        # Extract JSON part from potential code blocks or extra text
        json_match = re.search(r'\[\s*".*"\s*(,\s*".*")*\s*\]', gpt_output_str)
        if json_match:
            gpt_output_str = json_match.group(0)
            
        try:
            tags = json.loads(gpt_output_str)
            if isinstance(tags, list):
                return tags
            else:
                print(f"Error: Expected a list of tags but got {type(tags)}")
                return None
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON: {e}")
            return None
    
    def generate_tags(self, image_path: str, prompt: str, print_output: bool = False) -> Optional[List[str]]:
        """Generate tags for an image using the given prompt"""
        output = self.call_api(image_path, prompt, print_output)
        return self.process_output(output)
    
    def consolidate_tags(self, image_path: str, all_tags: List[str], prompt: str) -> Optional[List[str]]:
        """Consolidate tags by validating them against the image"""
        # Create a consolidated prompt that includes the tags to verify
        full_prompt = prompt + "\n\nPreviously identified tags: [" + ", ".join(f'"{tag}"' for tag in all_tags) + "]"
        return self.generate_tags(image_path, full_prompt)

### Data output helper

In [4]:
import csv
import json
import os
from typing import Dict, List, Set, Optional, Tuple

class CsvHandler:
    """Handle CSV operations with context management"""
    def __init__(self, filename: str, fieldnames: List[str]):
        self.filename = filename
        self.fieldnames = fieldnames
        self._existing_entries: Optional[Set[str]] = None

    @property
    def existing_entries(self) -> Set[str]:
        """Lazily load existing entries as filename strings"""
        if self._existing_entries is None:
            self._existing_entries = self._load_existing_entries()
        return self._existing_entries

    def _load_existing_entries(self) -> Set[str]:
        if not os.path.exists(self.filename):
            return set()
        with open(self.filename, mode='r', encoding='utf-8') as f:
            return {row['fileName'] for row in csv.DictReader(f)}

    def write_tags(self, tag_results: List[Dict]) -> None:
        """Write new tag results to CSV file"""
        new_entries = [
            entry for entry in tag_results
            if entry.get('fileName') not in self.existing_entries
        ]

        if not new_entries:
            print("No new entries to write")
            return

        mode = 'a' if os.path.exists(self.filename) else 'w'
        with open(self.filename, mode=mode, newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=self.fieldnames)
            if mode == 'w':
                writer.writeheader()
            writer.writerows(new_entries)

        print(f"Wrote {len(new_entries)} new entries")

# Main

### Get all image files from input directory

In [5]:
# Initialize processor and client
image_processor = ImageProcessor()
gpt_client = GptVisionClient(
    api_key=APIKey,
    model=gptModel,
    temperature=modelTemperature,
    detail_level=detailLevel
)

# Get all image files from input directory
image_files = image_processor.get_image_files(inputFilePath)
image_files

Found 5 image files in 'images_sample'


['images_sample/SI202503310625.jpg',
 'images_sample/SI202407060351.jpg',
 'images_sample/SI202503041658.jpg',
 'images_sample/SI202504020575.jpg',
 'images_sample/SI202504021111.jpg']

### Process images with GPT Vision (Multiple Runs)

In [6]:
# Process each image with GPT Vision API (multiple runs)
tag_results = []

for image_path in image_files:
    print(f"Processing {image_path}")
    file_name = os.path.basename(image_path)
    
    # Store all tag results from multiple runs
    all_runs_tags = []
    
    # First run
    print(f"Run 1/{rerunCount+1}...")
    tags = gpt_client.generate_tags(image_path, taggingPrompt, print_output=True)
    
    # Check if we got an API key error message
    if isinstance(tags, str) and "Incorrect API key provided" in tags:
        print("API Key error detected. Stopping execution.")
        break
    
    if not tags:
        print(f"Failed to get GPT response for {file_name}")
        continue
    
    all_runs_tags.append(tags)
    print(f"Run 1: Found {len(tags)} tags")
    
    # Additional runs if rerunCount > 0
    if rerunCount > 0:
        for run in range(rerunCount):
            print(f"Run {run+2}/{rerunCount+1}...")
            run_tags = gpt_client.generate_tags(image_path, taggingPrompt)
            
            if run_tags:
                all_runs_tags.append(run_tags)
                print(f"Run {run+2}: Found {len(run_tags)} tags")
            else:
                print(f"Run {run+2}: Failed to get tags")
            
            # Add a delay to prevent API rate limiting
            time.sleep(apiDelaySeconds)
    
    # Deduplicate tags from all runs
    unique_tags = image_processor.deduplicate_tags(all_runs_tags)
    print(f"After deduplication: {len(unique_tags)} unique tags")
    
    # If we have multiple runs, consolidate results with a final verification pass
    final_tags = tags  # Default to first run results
    if len(all_runs_tags) > 1:
        print("Performing final consolidation run...")
        consolidated_tags = gpt_client.consolidate_tags(image_path, unique_tags, consolidationPrompt)
        
        if consolidated_tags:
            final_tags = consolidated_tags
            print(f"Consolidated to {len(final_tags)} verified tags")
        else:
            print("Consolidation failed, using deduplicated tags from all runs")
    
    # Store final results
    tag_results.append({
        "fileName": file_name,
        "tags": final_tags
    })
    
    print(f"Final tags for {file_name}: {', '.join(final_tags)}")
    print("-" * 50)
    
    # Add a delay to prevent API rate limiting
    time.sleep(apiDelaySeconds)

print(f"Successfully processed {len(tag_results)} images")

Processing images_sample/SI202503310625.jpg
Run 1/1...


2025-04-15 14:44:26,413 [INFO] HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"


["Formula 1 car", "Red Bull Racing", "car nose", "front wing", "suspension arms", "sponsor logos", "Hard Rock", "TAG Heuer", "Visa", "Gate.io", "AT&T", "Ava Trade", "Maui Jim", "CDW", "neat", "Pirelli", "Mobil 1", "Oracle", "number 1", "racing livery", "motorsport", "racing car detail"]
Run 1: Found 22 tags
After deduplication: 22 unique tags
Final tags for SI202503310625.jpg: Formula 1 car, Red Bull Racing, car nose, front wing, suspension arms, sponsor logos, Hard Rock, TAG Heuer, Visa, Gate.io, AT&T, Ava Trade, Maui Jim, CDW, neat, Pirelli, Mobil 1, Oracle, number 1, racing livery, motorsport, racing car detail
--------------------------------------------------
Processing images_sample/SI202407060351.jpg
Run 1/1...


2025-04-15 14:44:37,954 [INFO] HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"


["vintage warplanes", "aerial formation flying", "smoke trails", "mountain landscape", "green fields", "rural area", "aerial photography", "propeller aircraft", "airshow", "WWII fighter planes", "blue sky", "scenic valley", "highway", "industrial area"]
Run 1: Found 14 tags
After deduplication: 14 unique tags
Final tags for SI202407060351.jpg: vintage warplanes, aerial formation flying, smoke trails, mountain landscape, green fields, rural area, aerial photography, propeller aircraft, airshow, WWII fighter planes, blue sky, scenic valley, highway, industrial area
--------------------------------------------------
Processing images_sample/SI202503041658.jpg
Run 1/1...


2025-04-15 14:44:47,920 [INFO] HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"


["skiers", "Henrik Kristoffersen", "Lucas Pinheiro Braathen", "Alexis Pinturault", "ski poles", "ski boots", "ski race bibs", "ski competition", "ski World Cup", "snow", "celebration", "winter sports", "ski race podium", "Red Bull logo", "Audi logo", "Longines logo", "Vedestein logo", "I Feel Slovenia banner", "Kranjska Gora", "ski suits", "ski helmets", "ski goggles"]
Run 1: Found 22 tags
After deduplication: 22 unique tags
Final tags for SI202503041658.jpg: skiers, Henrik Kristoffersen, Lucas Pinheiro Braathen, Alexis Pinturault, ski poles, ski boots, ski race bibs, ski competition, ski World Cup, snow, celebration, winter sports, ski race podium, Red Bull logo, Audi logo, Longines logo, Vedestein logo, I Feel Slovenia banner, Kranjska Gora, ski suits, ski helmets, ski goggles
--------------------------------------------------
Processing images_sample/SI202504020575.jpg
Run 1/1...


2025-04-15 14:44:56,752 [INFO] HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"


["Formula 1 drivers", "racing suits", "Red Bull Racing", "AlphaTauri", "Oracle Red Bull Racing cap", "smiling", "sitting", "race track environment", "sponsor logos", "Visa", "AT&T", "Honda", "Rauch", "ROKiT", "HRC", "Red Bull logos", "team members"]
Run 1: Found 17 tags
After deduplication: 17 unique tags
Final tags for SI202504020575.jpg: Formula 1 drivers, racing suits, Red Bull Racing, AlphaTauri, Oracle Red Bull Racing cap, smiling, sitting, race track environment, sponsor logos, Visa, AT&T, Honda, Rauch, ROKiT, HRC, Red Bull logos, team members
--------------------------------------------------
Processing images_sample/SI202504021111.jpg
Run 1/1...


2025-04-15 14:45:06,555 [INFO] HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"


["Formula 1 cars", "Red Bull Racing", "Honda engine", "street circuit", "Tokyo International Cruise Terminal Station", "urban racing environment", "race spectators", "fencing", "Japanese road signs", "motor racing", "car racing", "race track barriers", "cityscape", "race event"]
Run 1: Found 14 tags
After deduplication: 14 unique tags
Final tags for SI202504021111.jpg: Formula 1 cars, Red Bull Racing, Honda engine, street circuit, Tokyo International Cruise Terminal Station, urban racing environment, race spectators, fencing, Japanese road signs, motor racing, car racing, race track barriers, cityscape, race event
--------------------------------------------------
Successfully processed 5 images


### Write the tags to the output file

In [7]:
# Initialize CSV handler
csv_handler = CsvHandler(
    outputFilePath,
    ["fileName", "tags"]
)

# Prepare final data for CSV
csv_entries = []
for result in tag_results:
    # Skip entries where no tags were extracted
    if not result['tags']:
        print(f"Skipping {result['fileName']} - no tags extracted")
        continue
        
    # Convert tags list to string for storage in CSV
    entry = {
        "fileName": result['fileName'],
        "tags": ", ".join(result['tags'])
    }
    
    csv_entries.append(entry)

# Write to CSV
if csv_entries:
    csv_handler.write_tags(csv_entries)
    print(f"Tags written to {outputFilePath}")
else:
    print("No tags to write to CSV")

Wrote 5 new entries
Tags written to tags.csv


### Test Results

In [8]:
import pandas as pd

# Display summary of processed images
for i, result in enumerate(tag_results):
    print(f"\n----- Image {i+1}: {result['fileName']} -----")
    print(f"Number of tags: {len(result['tags'])}")
    print(f"Tags: {', '.join(result['tags'])}")
    print("-" * 50)

print(f"\nTotal images processed: {len(tag_results)}")

# Try to read the output CSV file if it exists
try:
    if os.path.exists(outputFilePath):
        df = pd.read_csv(outputFilePath)
        print(f"\nOutput CSV contains {len(df)} entries:")
        display(df)
except Exception as e:
    print(f"Couldn't read output CSV: {e}")


----- Image 1: SI202503310625.jpg -----
Number of tags: 22
Tags: Formula 1 car, Red Bull Racing, car nose, front wing, suspension arms, sponsor logos, Hard Rock, TAG Heuer, Visa, Gate.io, AT&T, Ava Trade, Maui Jim, CDW, neat, Pirelli, Mobil 1, Oracle, number 1, racing livery, motorsport, racing car detail
--------------------------------------------------

----- Image 2: SI202407060351.jpg -----
Number of tags: 14
Tags: vintage warplanes, aerial formation flying, smoke trails, mountain landscape, green fields, rural area, aerial photography, propeller aircraft, airshow, WWII fighter planes, blue sky, scenic valley, highway, industrial area
--------------------------------------------------

----- Image 3: SI202503041658.jpg -----
Number of tags: 22
Tags: skiers, Henrik Kristoffersen, Lucas Pinheiro Braathen, Alexis Pinturault, ski poles, ski boots, ski race bibs, ski competition, ski World Cup, snow, celebration, winter sports, ski race podium, Red Bull logo, Audi logo, Longines logo,

Unnamed: 0,fileName,tags
0,SI202503310625.jpg,"Formula 1 car, Red Bull Racing, car nose, fron..."
1,SI202407060351.jpg,"vintage warplanes, aerial formation flying, sm..."
2,SI202503041658.jpg,"skiers, Henrik Kristoffersen, Lucas Pinheiro B..."
3,SI202504020575.jpg,"Formula 1 drivers, racing suits, Red Bull Raci..."
4,SI202504021111.jpg,"Formula 1 cars, Red Bull Racing, Honda engine,..."
