In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
AZURE_OPENAI_ENDPOINT=os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_VERSION=os.getenv("AZURE_OPENAI_API_VERSION")
AZURE_OPENAI_DEPLOYMENT=os.getenv("AZURE_OPENAI_DEPLOYMENT")

In [2]:
from openai import AzureOpenAI
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed, 
# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the 
# keys for each service
# If you encounter an error here are some ways to troubleshoot:
# - you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True)
# - For your credential, grant Cognitive Services User and Cognitive Services OpenAI User roles - EVEN IF YOU HAVE OWNER/ADMIN RIGHTS.
token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")

client = AzureOpenAI(
  azure_endpoint = AZURE_OPENAI_ENDPOINT,
  api_version=AZURE_OPENAI_API_VERSION,
  azure_ad_token_provider=token_provider
)

In [3]:
import logging
import json

def execute_image_completion(client, encoded_image, system_prompt, deployment_name="gpt-4o", temperature=0):
    """
    Executes a GPT-4o chat completion based on the system prompt and encoded image.

    Args:
        client (object): The Azure OpenAI client object.
        encoded_image (str): The base64 encoded image.
        system_prompt (str, optional): The system prompt. Defaults to None.
        deployment_name (str): The deployment name of the vision model.
        temperature (float, optional): The temperature of the completion. Defaults to 0.

    Returns:
        str: The generated response from the chat completion.
    """

    if client is None:
        logging.info("client parameter is required.")
        raise ValueError("client parameter is required.")
    if encoded_image is None:
        logging.info("encoded_image parameter is required.")
        raise ValueError("encoded_image parameter is required.")
    if system_prompt is None:
        logging.info("system_prompt parameter is required.")
        raise ValueError("system_prompt parameter is required.")
    
    if isinstance(system_prompt, list) or isinstance(system_prompt, dict):
        messages = [
            {
                "role": "system",
                "content": json.dumps(system_prompt)
            }
        ]
    else:
        messages = [
            {
                "role": "system",
                "content": json.dumps(system_prompt)
            }
        ]
    
    messages.append(
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encoded_image}"
                    }
                }
            ]
        }
    )

    logging.info("Executing image completion...")
    response = client.chat.completions.create(
        model=deployment_name,
        messages=messages,
        temperature=temperature
    )
    
    print ('---------------------------------')
    print (response.usage)
    print ('---------------------------------')

    return response.choices[0].message.content


def execute_text_completion(client, text, system_prompt, deployment_name="gpt-4o", temperature=0.3):
    """
    Executes a GPT-4o chat completion based on the system prompt and text input.

    Args:
        client (object): The Azure OpenAI client object.
        text (str): The user text input.
        system_prompt (str, optional): The system prompt. Defaults to None.
        deployment_name (str): The deployment name of the chat completion model.
        temperature (float, optional): The temperature of the completion. Defaults to 0.3.

    Returns:
        str: The generated response from the chat completion.
    """

    if client is None:
        logging.info("client parameter is required.")
        raise ValueError("client parameter is required.")
    if text is None:
        logging.info("text parameter is required.")
        raise ValueError("text parameter is required.")
    if system_prompt is None:
        logging.info("system_prompt parameter is required.")
        raise ValueError("system_prompt parameter is required.")
    
    if isinstance(system_prompt, list) or isinstance(system_prompt, dict):
        messages = [
            {
                "role": "system",
                "content": json.dumps(system_prompt)
            }
        ]
    else:
        messages = [
            {
                "role": "system",
                "content": json.dumps(system_prompt)
            }
        ]
    
    messages.append(
        {
            "role": "user",
            "content": text
        }
    )

    logging.info("Executing text completion...")
    response = client.chat.completions.create(
        model="gpt4o",
        messages=messages,
        temperature=0.3
    )
    
    print ('---------------------------------')
    print (response.usage)
    print ('---------------------------------')

    return response.choices[0].message.content

In [4]:
def read_file(file_path):
    """
    Reads the contents of a file and returns it as a string.

    Args:
        file_path (str): The path to the file.

    Returns:
        str: The contents of the file as a string.
    """
    with open(file_path, 'r') as file:
        content = file.read()
    return content

In [5]:
import base64

IMAGE_PATH="../media/notes-sample.png"
encoded_image = base64.b64encode(open(IMAGE_PATH, 'rb').read()).decode('ascii')

FileNotFoundError: [Errno 2] No such file or directory: '../media/notes-sample.png'

In [None]:
system_prompt = read_file("../az-function/prompts/detectNoteType.txt")

noteType = execute_image_completion(client, encoded_image, system_prompt, deployment_name=AZURE_OPENAI_DEPLOYMENT)
print (noteType)

In [None]:
import prompts.ocrPaper as ocrPaper
import prompts.ocrWhiteboard as ocrWhiteboard

if noteType == "PAPER":
    system_prompt = ocrPaper.get_prompt_content()
elif noteType == "WHITEBOARD":
    system_prompt = ocrWhiteboard.get_prompt_content()
else:
    system_prompt = read_file("../az-function/prompts/ocrImage.txt")
    
print (system_prompt)

extractedText = execute_image_completion(client, encoded_image, system_prompt, deployment_name=AZURE_OPENAI_DEPLOYMENT)
print (extractedText)

In [None]:
if noteType == "PAPER" or noteType == "WHITEBOARD":
    system_prompt = read_file("../az-function/prompts/proofread.txt")
    extractedText = execute_text_completion(client, extractedText, system_prompt, deployment_name=AZURE_OPENAI_DEPLOYMENT)
    print (extractedText)

In [None]:
if noteType == "PAPER" or noteType == "WHITEBOARD":
    system_prompt = read_file("../az-function/prompts/sectionHeader.txt")
    extractedText = execute_text_completion(client, extractedText, system_prompt, deployment_name=AZURE_OPENAI_DEPLOYMENT)
    print (extractedText)

In [None]:
## Test, shows text completion is much faster.
import time

determine_title_prompt = """
Extract the main title for this image.
- Respond with the title only.
- If the title is in markdown, remove markdown formatting.
- If there is no title, respond with "NONE".
"""

start_time = time.time()
title = execute_image_completion(client, encoded_image, system_prompt=determine_title_prompt, deployment_name=AZURE_OPENAI_DEPLOYMENT)

end_time = time.time()
duration = end_time - start_time
print(f"{title} (Execution time: {duration} seconds)")

determine_title_prompt = f"""Extract the main title for these notes
- Respond with the title only.
- If the title is in markdown, remove markdown formatting.
- If there is no title, respond with "NONE".
"""

start_time = time.time()
title = execute_text_completion(client, extractedText, system_prompt=determine_title_prompt, deployment_name=AZURE_OPENAI_DEPLOYMENT)
end_time = time.time()
duration = end_time - start_time
print(f"{title} (Execution time: {duration} seconds)")

In [None]:
import datetime

def get_title(extracted_text):
    system_prompt = read_file("../az-function/prompts/extractMainTitle.txt")
    title = execute_text_completion(client, extracted_text, system_prompt, deployment_name=AZURE_OPENAI_DEPLOYMENT)
    title = title.replace("{DateStamp}", datetime.datetime.now().strftime("%Y%m%d"))
    return title

In [None]:
print(get_title("2024-07-13 Weekly Planning Notes"))
print(get_title("2024-07-13 09:00 Daily Journal"))
print(get_title("202407 Team Offsite"))
print(get_title("## MOTM - Customer Meeting"))
print(get_title("# **Key Note** - Microsoft Build _(2024)_"))

print(get_title(extractedText))

In [None]:
extractedText = extractedText.strip()
if extractedText.startswith("```markdown"):
    extractedText = extractedText[len("```markdown"):]
if extractedText.endswith("```"):
    extractedText = extractedText[:-len("```")]

extractedText = extractedText.strip()

In [None]:
import os
import datetime

filename = os.path.basename(IMAGE_PATH)
last_updated = datetime.datetime.fromtimestamp(os.path.getmtime(IMAGE_PATH)).strftime('%Y-%m-%d %H:%M')

result = f"""---
note-type: {noteType}
created-date: {last_updated}
last-updated: {last_updated}
---
# {filename}
![{filename}]({IMAGE_PATH})
{extractedText}"""

# save result to output.md
with open("output.md", "w") as file:
    file.write(result)

print(result)
