In [101]:
from dotenv import load_dotenv
import os
import openai
from pydantic import BaseModel, Field
import base64
import requests

from typing import Literal

load_dotenv()


openai_api_key = os.getenv("OPENAI_API_KEY")
openai_client=openai.OpenAI(api_key=openai_api_key)

In [92]:
image_links = ["https://fireworks-take-home.s3.us-east-1.amazonaws.com/License+1.png",
               "https://fireworks-take-home.s3.us-east-1.amazonaws.com/License-2.jpg",
               "https://fireworks-take-home.s3.us-east-1.amazonaws.com/License-3.jpeg",
               "https://fireworks-take-home.s3.us-east-1.amazonaws.com/passport-1.jpeg",
               "https://fireworks-take-home.s3.us-east-1.amazonaws.com/passport-2.jpg"]

In [104]:
api_key = os.getenv("FIREWORKS_API_KEY")
client = openai.OpenAI(api_key=api_key, base_url="https://api.fireworks.ai/inference/v1")

def encode_image(image_url):
  response = requests.get(image_url)
  return base64.b64encode(response.content).decode('utf-8')

class Result(BaseModel):
    document_type: Literal["passport", "drivers license"] = Field(description="The type of the document, available options are passport or drivers license")
    image_orientation: Literal["up", "right", "left", "down"] = Field(description="The orientation of the image in regards to text on the document, up, right, left or down")

image_base64 = encode_image(image_links[3])

response = client.chat.completions.create(
  model = "accounts/fireworks/models/phi-3-vision-128k-instruct",
  response_format={"type": "json_object", "schema": Result.model_json_schema()},
  messages = [{
    "role": "user",
    "content": [{
      "type": "text",
      "text": "Can you describe what type of a document is this? and also its orientation. If image is oriented correctly the value is up, if the text is rotated 90 degree clockwise the orientation is right, if text is upside down the orientation is down and lastly if the image is oriented 90degrees counter clockwise, the orientation is left",
    }, {
      "type": "image_url",
      "image_url": {
        "url": f"data:image/jpeg;base64,{image_base64}"
      },
    }, ],
  }],
)
print(response.choices[0].message.content)

 {
"document_type": "passport",
"image_orientation": "up"
}

