### PART 2 Use to convert a single PDF to images, then convert and use a single image in an API call.

In [None]:
# @title Install Libraries

!pip install -q openai
!pip install -q pdf2image
!apt-get install -y -q poppler-utils
!pip install -q Pillow
!pip install -q IPython

In [None]:
# @title Import Dependencies

from openai import OpenAI
import os
import json
import logging
from pdf2image import convert_from_path
from google.colab import drive
from openai.types.chat import ChatCompletionMessageParam
import base64
from glob import glob
from PIL import Image
from IPython.display import Image, Markdown, display

In [None]:
# @title Mount Google Drive

from google.colab import drive
drive.mount('/content/drive')

In [None]:
# @title CD to Project Directory

# Change the directory to where your project files are located
%cd /content/drive/MyDrive/Templates/ReadingPDFImages

In [None]:
# @title Configure Error Logging

log_file = 'config_loader.log'
logging.basicConfig(
    filename=log_file,
    level=logging.ERROR,  # Only log errors and above (ERROR, CRITICAL)
    format='%(asctime)s - %(levelname)s - %(message)s'
)

In [None]:
# @title FUNCTION: PDF2Images

def PDF2Images(pdf_file: str, output_folder: str):

  if not os.path.exists(output_folder):
    os.makedirs(output_folder, exist_ok=True)

  # convert PDF to images
  images = convert_from_path(pdf_file)

  image_paths = []

  # Save images and paths
  for i, image in enumerate(images):
    image_path = os.path.join(output_folder, f"page{i+1}.jpg")
    image.save(image_path, "JPEG")
    image_paths.append(image_path)

  return image_paths

In [None]:
# @title Define Paths & call PDF2Images function

pdf_file =  "/content/drive/MyDrive/Templates/ReadingPDFImages/pdf_file/things-mother-used-to-make.pdf"
output_folder = "/content/drive/MyDrive/Templates/ReadingPDFImages/images/"

PDF2Images(pdf_file, output_folder)

---
## **END OF CREATING IMAGES FROM PDF**
---

---
### **BEGIN API CALL ON IMAGE DATA**
---

In [None]:
# @title Load API Key

file_name = '/content/drive/MyDrive/config.json'

try:
  if not os.path.exists(file_name):
    raise FileNotFoundError(f"Config file not found at: {file_name}")

  with open(file_name, 'r') as file:
    try:
      config = json.load(file)
    except json.JSONDecodeError as e:
      raise ValueError(f"Error decoding JSON: {e}")

  api_key = config.get("API_KEY")
  base_url = config.get("OPENAI_API_BASE")

  if not api_key:
    raise KeyError("Missing 'API_KEY' in config.jason")
  if not base_url:
    raise KeyError("Missing 'OPENAI_API_BASE' in config.jason")

  # Sets Environment Variables
  os.environ['OPENAI_API_KEY'] = config.get("API_KEY")
  os.environ["OPENAI_BASE_URL"] = config.get("OPENAI_API_BASE")

  print("Environment variables set successfully.")

except Exception as e:
  # Log the error to the log file
  logging.error(e)
  print(f"An error ocurred.  Check {log_file} for details.")

In [None]:
# @title Connect to OPENAI API

try:
    # Get the API key from environment variables
    api_key = os.environ.get("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("The OPENAI_API_KEY environment variable is not set.")

    # Initialize the OpenAI client
    client = OpenAI(api_key=api_key)

    # Specify the model you want to use
    model = "gpt-5"

    print("OpenAI client initialized successfully.")

except Exception as e:
    # Handle any errors during initialization
    logging.error(e)
    print(f"An error occurred.  Check {log_file} for details.")

In [None]:
# @title Read and encode one image

image_path = "images/page23.jpg"

with open(image_path, "rb") as image_file:
  image_data = base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
# @title SYSTEM PROMPT

system_prompt  = """

You are an OCR/transcription engine.
Please analyze the content of this image, extract the recipe type, recipe name, ingredients (one per line), and instructions per the sample below.  Do not normalize or correct text beyond obvious OCR typos.
Never infer or guess. The recipe type can be repeated for each recipe of that type. If no ingredients are listed disregad that section and print No ingredients, then move on to the instructions.

The output should be formated as the sample below:

Recipe Type: BREADS
\n
\n
Recipe Name: Bannocks
\n
\n
Ingredients:
\n
\n
1 Cupful of Thick Sour Milk\n
1/2 Cupful of Sugar\n
1 Egg\n
2 Cupfuls of Four\n
1/2 Cupful of Indian Meal\n
1 Teaspoonful of Soda\
A pinch of Salt\n
\n
Instructions:
\n
\n
Make the mixture stiff enough to drop from a spoon.  Drop mixture, size of a walnut, into boiling
fat.  Serve warm, with maple syrup.
\n
\n
<hr />
\n
\n

"""

In [None]:
# @title FUNCTON: get_gpt_response

def get_gpt_response():
  gpt_response = response.choices[0].message.content
  return display(Markdown(gpt_response))

In [None]:
# @title Create LLM Response

response = client.chat.completions.create(
    model = model,
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": [
            "Extract the text from this image.",
            {"type": "image_url",
             "image_url": {"url": f"data:image/jpeg;base64,{image_data}",
                           "detail": "low"}}
        ]}
    ],
    response_format={"type": "text"}
)

In [None]:
# @title CALL FUNCTION: get_gpt_response to Display the response

get_gpt_response()