# Generate Image Descriptions
Generate detailed image descriptions for each image using their keywords, location, and content.

In [122]:
from openai import OpenAI
import os
import base64
import requests
from requests.exceptions import RequestException, HTTPError
import pandas as pd
from dotenv import load_dotenv
from requests.exceptions import RequestException
import time

## Encode image

In [97]:
def encode_image(image_path):
  """
  Encode an image to base64 to pass to OpenAI's API
  """
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

## Image to text

In [98]:
import requests
import os
from dotenv import load_dotenv
from requests.exceptions import RequestException

def image_to_text(base64_image, people, location):
    """
    Call openai api (gpt-4-vision-preview) to get a detailed description of the image.
    """
    dotenv_path = '../.env'
    load_dotenv(dotenv_path)
    api_key = os.getenv("OPENAI_API_KEY")

    # Construct the prompt based on available information
    prompt = "Describe what's going on in the image."

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [{
            "role": "user",
            "content": [{
                "type": "text",
                "text": prompt
            }, {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}"
                }
            }]
        }],
        "max_tokens": 300
    }

    try:
        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
        response.raise_for_status()  # Raises a RequestException for 4XX/5XX errors
        data = response.json()
        return data['choices'][0]['message']['content']
    except RequestException as e:
        print(f"Request to OpenAI failed: {e}")
        return "An error occurred while generating the description."
    except KeyError as e:
        print(f"Unexpected response format from OpenAI: {e}")
        return "Failed to parse the response from OpenAI."
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return "An unexpected error occurred while processing the image."

## Extract keywords and location

In [99]:
def extract_keywords_and_location(image_name, df):
    """
    Extracts keywords and location for a given filename from a DataFrame.
    
    Parameters:
    - filename: The name of the file for which to extract information.
    - df: The DataFrame containing the photo metadata.
    
    Returns:
    A tuple containing (keywords, location) for the given filename.
    If the filename is not found, returns ('No Keywords', 'No Location').
    """
    # Filter the DataFrame for the specified filename
    file_data = df[df['Filename'] == image_name]
    
    # Check if the file exists in the DataFrame
    if not file_data.empty:
        # Extracting the keywords and location
        keywords = file_data['Keywords'].iloc[0]
        location = file_data['Location'].iloc[0]
        return keywords, location
    else:
        return 'No Keywords', 'No Location'


## Image to text

In [100]:
def construct_prompt(people, location):
    """
    Construct a prompt based on the available information.

    Input: people (str), location (str)
    Output: prompt (str)
    """
    if people == "" and location == "":
        return "Generate a description for what's going on in the image."
    elif people == "":
        return f"This image is at {location}. Generate a description for what's going on in the image."
    elif location == "":
        return f"The people in the image are {people}. Generate a description for what's going on in the image."
    else:
        return (f"The people in the image are {people} and they are at {location}. Generate a description for what's going on in the image."
                "The description should make use of their names and the location I provided you.")

In [123]:
def image_to_text(base64_image, retries=3, backoff_factor=2):
    """
    Call openai api (gpt-4-vision-preview) to get detailed description of the image with retry mechanism.
    """
    dotenv_path = '../.env'
    load_dotenv(dotenv_path)
    api_key = os.getenv("OPENAI_API_KEY")

    prompt = "Generate a description for what's going on in the image."

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [{
            "role": "user",
            "content": [{
                "type": "text",
                "text": prompt
            }, {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}"
                }
            }]
        }],
        "max_tokens": 300
    }

    for attempt in range(retries):
        try:
            response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
            response.raise_for_status()  # This will raise HTTPError for 4XX/5XX responses, which is a subclass of RequestException
            data = response.json()
            return data['choices'][0]['message']['content']
        except HTTPError as e:
            # HTTPError is raised for HTTP responses with error status codes
            if response.status_code == 429:
                if attempt < retries - 1:
                    sleep_time = backoff_factor * (2 ** attempt)
                    print(f"Rate limit exceeded. Retrying in {sleep_time} seconds...")
                    time.sleep(sleep_time)
                    continue  # Important to continue to the next iteration of the loop
                else:
                    return "Failed to generate description after several attempts due to rate limiting."
            else:
                print(f"HTTP request error: {e}")
                return "An error occurred while generating the description."
        except RequestException as e:
            # This handles other request-related errors (e.g., network issues)
            print(f"Request to OpenAI failed: {e}")
            return "An error occurred while generating the description."
        except KeyError as e:
            print(f"Unexpected response format from OpenAI: {e}")
            return "Failed to parse the response from OpenAI."
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
            return "An unexpected error occurred while processing the image."


## Generate Image Descriptions -- First Pass

In [127]:
def generate_image_descriptions(images_folder, images):
    """
    Generate image descriptions for the images in the specified folder.
    """
    # Ensure the images_folder is an absolute path or correct the relative path
    images_folder_abs = os.path.abspath(images_folder)
    print(f"Looking for images in: {images_folder_abs}")

    # Verify the directory exists
    if not os.path.exists(images_folder_abs):
        print(f"Directory not found: {images_folder_abs}")
        return {}

    image_descriptions = {}

    for image_name in os.listdir(images_folder_abs):
        if image_name.lower().endswith(('.jpeg', '.png', '.jpg')) and image_name in images:
            image_path = os.path.join(images_folder_abs, image_name)
            base64_image = encode_image(image_path)
            description = image_to_text(base64_image)
            image_descriptions[image_name] = description

    return image_descriptions

In [103]:
images_folder = '../images'
all_images = os.listdir(images_folder) # all images in the folder
image_descriptions = generate_image_descriptions(images_folder, all_images)

Looking for images in: /Users/danielmedina/Documents/Duke/classes/spring-2024/aipi590/photos-rag/images
Rate limit exceeded. Retrying in 2 seconds...
Rate limit exceeded. Retrying in 4 seconds...
Rate limit exceeded. Retrying in 2 seconds...
Rate limit exceeded. Retrying in 4 seconds...
Rate limit exceeded. Retrying in 2 seconds...
Rate limit exceeded. Retrying in 4 seconds...
Rate limit exceeded. Retrying in 2 seconds...
Rate limit exceeded. Retrying in 4 seconds...
Rate limit exceeded. Retrying in 2 seconds...
Rate limit exceeded. Retrying in 4 seconds...
Rate limit exceeded. Retrying in 2 seconds...
Rate limit exceeded. Retrying in 4 seconds...
Rate limit exceeded. Retrying in 2 seconds...
Rate limit exceeded. Retrying in 4 seconds...
Rate limit exceeded. Retrying in 2 seconds...
Rate limit exceeded. Retrying in 4 seconds...
Rate limit exceeded. Retrying in 2 seconds...
Rate limit exceeded. Retrying in 4 seconds...
Rate limit exceeded. Retrying in 2 seconds...
Rate limit exceeded. R

In [139]:
# drop rows that contain "I'm sorry" in the 'Description' key
for k, v in image_descriptions.items():
    if "I'm sorry" in v:
        image_descriptions.pop(k)

# Convert the dictionary to csv
image_descriptions_df = pd.DataFrame(list(image_descriptions.items()), columns=['filename', 'description'])

# change the name "Filename" to "filename" and "Description" to "description
new_col_names = {'Filename': 'filename', 'Description': 'description'}
image_descriptions_df.rename(columns=new_col_names, inplace=True)

# Save the DataFrame to a csv file
image_descriptions_df.to_csv('output/image_descriptions.csv', index=False)

## Generate Image Descriptions -- Second Pass

In [119]:
missed_images = set()
for idx, image in image_descriptions_df.iterrows():
    if image['description'] == 'Failed to generate description after several attempts due to rate limiting.':
        missed_images.add(image['filename'])

In [133]:
print(missed_images)
print(images_folder)

['36263969-fb97-4904-8bda-d51ac0528b02.jpeg', 'IMG_5090.jpeg', 'IMG_0504.jpeg', 'IMG_1315.jpeg', '099f5b7b-2130-4d72-b839-dbc1a82a6c46.jpeg', 'IMG_1753.jpeg', 'c642dd9b-2af8-45d3-abf8-628a96ee3056.jpeg', '46ba2947-9e21-4a1c-8406-7d5546a4c528.jpeg', 'IMG_5878.jpeg', 'IMG_0379.jpeg', 'f97d78ae-9e43-4e48-bf1e-b33f58b99611.jpeg', 'IMG_2082.jpeg', 'IMG_1354.jpeg', 'IMG_0816.jpeg', 'IMG_2153.jpeg', 'IMG_2041.jpeg', 'afe532ba-14c2-4312-a045-d04003bff9ee.jpeg', '67713394321__D81137A1-7791-4040-9C6B-63300C595710.jpeg', 'dab5291d-1688-4a52-9d45-4cea89697bc8.jpeg', 'IMG_2104.jpeg', 'IMG_4879.jpeg', '9915d79f-3330-42d5-b991-c1668bcaa1a1.jpeg', '67843110741__B2724D55-A3A6-421D-9813-D87A4E34A7F8.jpeg', '8d7dbda3-6b80-4b6b-91f6-77361b3f2ebb.jpeg', 'IMG_4880.jpeg', 'IMG_2057.jpeg', 'IMG_2112.jpeg', 'IMG_6385.jpeg', 'AA3C4602-8EA8-4035-9EC4-A66863633181.jpeg', 'IMG_2145.jpeg', '91106589-eb0c-4add-908b-d951148143e3.jpeg', 'IMG_2098.jpeg', 'IMG_2132.jpeg', '69bb6a6b-7c6b-4f38-80b0-2d599b36a7f3.jpeg', 'IM

In [134]:
missed_images_descriptions = generate_image_descriptions(images_folder, missed_images)

Looking for images in: /Users/danielmedina/Documents/Duke/classes/spring-2024/aipi590/photos-rag/images
Rate limit exceeded. Retrying in 2 seconds...
HTTP request error: 500 Server Error: Internal Server Error for url: https://api.openai.com/v1/chat/completions
Request to OpenAI failed: HTTPSConnectionPool(host='api.openai.com', port=443): Max retries exceeded with url: /v1/chat/completions (Caused by SSLError(SSLError(1, '[SSL: SSLV3_ALERT_BAD_RECORD_MAC] ssl/tls alert bad record mac (_ssl.c:2580)')))
HTTP request error: 400 Client Error: Bad Request for url: https://api.openai.com/v1/chat/completions
HTTP request error: 500 Server Error: Internal Server Error for url: https://api.openai.com/v1/chat/completions
HTTP request error: 400 Client Error: Bad Request for url: https://api.openai.com/v1/chat/completions


In [135]:
# Convert the dictionary to csv and save it
missed_images_descriptions = pd.DataFrame(list(missed_images_descriptions.items()), columns=['Filename', 'Description'])
missed_images_descriptions.to_csv('output/missed_image_descriptions.csv', index=False)