In [39]:
import os
import glob
import json
from openai import OpenAI
import base64

# put your OpenAI API key in the environment variable OPENAI_API_KEY
api_key= os.getenv("OPENAI_API_KEY")



In [40]:
def process_node_images(
    images_folder: str,
    prompt_file: str,
    model_name: str = "gpt-4o-mini",
    output_file: str = "node_image_results.json"
):
    """
    Iterate over all JPG images in images_folder, send each image + prompt to the GPT vision model,
    and save structured results to a JSON file.

    images_folder:  Path to "CEPSR - Floor 7/Node images"
    prompt_file:    Path to the text file containing your base prompt (extracting_image_data_prompt.txt)
    model_name:     The vision-capable GPT model (e.g., "gpt-4o-mini")
    output_file:    Filename to write the final JSON results
    """
    # Initialize OpenAI client
    client = OpenAI(api_key=api_key)  # Make sure OPENAI_API_KEY is set in environment
    
    # Read the base prompt from disk
    with open(prompt_file, "r", encoding="utf-8") as f:
        base_prompt = f.read().strip()

    results = []

    # Find all .JPG files inside images_folder
    image_paths = glob.glob(os.path.join(images_folder, "*.JPG"))

    for i, img_path in enumerate(image_paths):
            
        basename = os.path.basename(img_path)                # e.g. "node2_north.JPG"
        name_no_ext, ext = os.path.splitext(basename)         # e.g. ("node2_north", ".JPG")
        parts = name_no_ext.split("_")
        if len(parts) != 2 or not parts[0].startswith("node"):
            # Skip any file that does not match the pattern "node{N}_{dir}.JPG"
            continue

        # Extract numeric location and direction
        try:
            location_num = int(parts[0].replace("node", ""))  # e.g. 2
        except ValueError:
            continue

        direction = parts[1].lower()                         # e.g. "north", "east"

        # Option 1: Resize image to reduce costs (uncomment if needed)
        # from PIL import Image
        # with Image.open(img_path) as img:
        #     img.thumbnail((1024, 1024))  # Resize to max 1024x1024
        #     img_byte_arr = io.BytesIO()
        #     img.save(img_byte_arr, format='JPEG', quality=85)
        #     image_data = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
        
        # Option 2: Use original image (current approach)
        with open(img_path, "rb") as image_file:
            image_data = base64.b64encode(image_file.read()).decode('utf-8')

        # Build the user message with the image
        user_content = f"{base_prompt}\n\nImage filename: {basename}"

        try:
            response = client.chat.completions.create(
                model=model_name,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": user_content
                            },
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{image_data}"
                                }
                            }
                        ]
                    }
                ]
            )
            
           # Extract the response content
            raw_response = response.choices[0].message.content.strip()
            
            # Parse JSON from the response
            try:
                # Remove markdown code blocks if present
                if raw_response.startswith('```json'):
                    # Extract JSON between ```json and ```
                    json_start = raw_response.find('```json') + 7
                    json_end = raw_response.rfind('```')
                    json_str = raw_response[json_start:json_end].strip()
                elif raw_response.startswith('```'):
                    # Extract JSON between ``` and ```
                    json_start = raw_response.find('```') + 3
                    json_end = raw_response.rfind('```')
                    json_str = raw_response[json_start:json_end].strip()
                else:
                    # Assume the entire response is JSON
                    json_str = raw_response
                
                # Parse the JSON string into a Python object
                parsed_json = json.loads(json_str)
                description = parsed_json  # Store as actual JSON object
                
            except json.JSONDecodeError as json_err:
                print(f"Warning: Could not parse JSON from {basename}: {json_err}")
                print(f"Raw response: {raw_response[:200]}...")
                # Fallback to storing as text if JSON parsing fails
                description = raw_response
            
        except Exception as e:
            print(f"Error processing {basename}: {e}")
            continue

        # Collect this into our results list
        results.append({
            "location":    location_num,
            "direction":   direction,
            "description": description,
            "filename":    basename
        })

        print(f"Processed {basename} (location {location_num}, {direction})")

    # Write the results array to disk in JSON format
    with open(output_file, "w", encoding="utf-8") as out_f:
        json.dump(results, out_f, indent=2, ensure_ascii=False)

    print(f"Finished. Results saved to {output_file}. Processed {len(results)} images.")

In [41]:
images_folder = "CEPSR - Floor 7/Node images"
prompt_file = "extracting_image_data_prompt.txt"
process_node_images(images_folder, prompt_file)

Processed node3_east.JPG (location 3, east)
Processed node2_east.JPG (location 2, east)
Processed node10_north.JPG (location 10, north)
Processed node1_south.JPG (location 1, south)
Processed node5_west.JPG (location 5, west)
Processed node4_west.JPG (location 4, west)
Processed node9_east.JPG (location 9, east)
Processed node8_east.JPG (location 8, east)
Processed node11_south.JPG (location 11, south)
Processed node7_south.JPG (location 7, south)
Processed node12_west.JPG (location 12, west)
Processed node6_north.JPG (location 6, north)
Processed node11_north.JPG (location 11, north)
Processed node1_north.JPG (location 1, north)
Processed node10_south.JPG (location 10, south)
Processed node12_east.JPG (location 12, east)
Processed node4_east.JPG (location 4, east)
Processed node6_south.JPG (location 6, south)
Processed node5_east.JPG (location 5, east)
Processed node2_west.JPG (location 2, west)
Processed node3_west.JPG (location 3, west)
Processed node7_north.JPG (location 7, north)
