## Importing Libraries

In [1]:
from openai import OpenAI
import config

## Setting OpenAI API Key

In [2]:
client = OpenAI(
  api_key=config.OPENAI_API_KEY,
)

## Testing 

In [10]:
response = client.chat.completions.create(
  model="gpt-4-turbo",
  messages=[
    {
      "role": "system",
      "content": "You are an experienced mobile developer, a web scraper, and an expert in image processing and generative AI. You will be provided with an image, which could be a screenshot of a document or PDF, or it could contain handwritten text. Your task is to extract all of the relevant data from the image and return it in a specified format."
    },
    {
      "role": "user",
      "content": [{"type":"text", 
                   "text":"What: Please help me extract the textual pieces of information from the image provided.\n\nWho: I am a professional Mobile application developer in Flutter.\n\nWhy: This question originates from a company that makes mobile applications. They are making an e-commerce app and they intend to add a feature in their app that allows the user to upload images and use generative AI to extract the relevant data from the image.\n\nWhich: The data you are supposed to extract from the image is 'Balance Sheet Date', 'Cash', 'Bank', 'Product List' (this could be more than one and have the following attributes): 'Product Name', 'Quantity', 'Delivery Charges', and 'Cost'. The wording of the aforementioned fields may differ so extract according to the meaning. Please ignore anything else.\n\nHow: Finally, return the data as a JSON response."
                   },
                   {
                      "type": "image_url",
                      "image_url": {
                        "url": "https://i.imgur.com/tvsgUl9.jpg"}
                   }
      ]
    }
  ],
  max_tokens=1000,
)


Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Here is the data extracted from the image in JSON format:\n\n```json\n{\n  "Balance Date": "31/03/1998",\n  "Cash": "$3,000",\n  "Bank": "Mars Galactic Bank",\n  "Product List": [\n    {\n      "Product Name": "Apple",\n      "Quantity": 36,\n      "Delivery Charges": "$10",\n      "Cost": "$1"\n    },\n    {\n      "Product Name": "Mouse",\n      "Quantity": 5,\n      "Delivery Charges": "$2",\n      "Cost": "$5"\n    }\n  ]\n}\n```', role='assistant', function_call=None, tool_calls=None))


## Displaying The Results

In [11]:
print(response.choices[0].message.content)

Here is the data extracted from the image in JSON format:

```json
{
  "Balance Date": "31/03/1998",
  "Cash": "$3,000",
  "Bank": "Mars Galactic Bank",
  "Product List": [
    {
      "Product Name": "Apple",
      "Quantity": 36,
      "Delivery Charges": "$10",
      "Cost": "$1"
    },
    {
      "Product Name": "Mouse",
      "Quantity": 5,
      "Delivery Charges": "$2",
      "Cost": "$5"
    }
  ]
}
```


In [25]:
response = client.chat.completions.create(
  model="gpt-4-turbo",
  messages=[
    {
      "role": "system",
      "content": "You are an experienced mobile developer, a web scraper, and an expert in image processing and generative AI. You will be provided with an image, which could be a screenshot of a document or PDF, or it could contain handwritten text. Your task is to extract all of the relevant data from the image and return it in a specified format."
    },
    {
      "role": "user",
      "content": [{"type":"text", 
                   "text":"What: Please help me extract the textual pieces of information from the image provided.\n\nWho: I am a professional Mobile application developer in Flutter.\n\nWhy: This question originates from a company that makes mobile applications. They are making an e-commerce app and they intend to add a feature in their app that allows the user to upload images and use generative AI to extract the relevant data from the image.\n\nWhich: The data you are supposed to extract from the image is 'Balance Sheet Date', 'Cash', 'Bank', 'Product List' (this could be more than one and have the following attributes): 'Product Name', 'Quantity', 'Delivery Charges', and 'Cost'. The wording of the aforementioned fields may differ so extract according to the meaning. Please ignore anything else.\n\nHow: Finally, return the data as a JSON response."
                   },
                   {
                      "type": "image_url",
                      "image_url": {
                        "url": "https://i.imgur.com/tvsgUl9.jpg"}
                   }
      ]
    }
  ],
  max_tokens=1000,
)


In [26]:
print(response.choices[0].message.content)

```json
{
  "Balance Sheet Date": "31/03/1998",
  "Cash": "$3,000",
  "Bank": "Mars Galactic Bank",
  "Product List": [
    {
      "Product Name": "Apple",
      "Quantity": 36,
      "Delivery Charges": "$10",
      "Cost": "$1"
    },
    {
      "Product Name": "Mouse",
      "Quantity": 5,
      "Delivery Charges": "$2",
      "Cost": "$5"
    }
  ]
}
```


## URL Correction

In [None]:
import base64
import requests


# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image
image_path = "data/bank_sheet_2.jpg"

# Getting the base64 string
base64_image = encode_image(image_path)

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {config.OPENAI_API_KEY}"
}

payload = {
  "model": "gpt-4-turbo",
  "messages": [
    {
      "role": "system",
      "content": "You are an experienced mobile developer, a web scraper, and an expert in image processing and generative AI. You will be provided with an image, which could be a screenshot of a document or PDF, or it could contain handwritten text. Your task is to extract all of the relevant data from the image and return it in a specified format."
    },
    {
      "role": "user",
      "content": [{"type":"text", 
                   "text":"What: Please help me extract the textual pieces of information from the image provided.\n\nWho: I am a professional Mobile application developer in Flutter.\n\nWhy: This question originates from a company that makes mobile applications. They are making an e-commerce app and they intend to add a feature in their app that allows the user to upload images and use generative AI to extract the relevant data from the image.\n\nWhich: The data you are supposed to extract from the image is 'Balance Sheet Date', 'Cash', 'Bank', 'Product List' (this could be more than one and have the following attributes): 'Product Name', 'Quantity', 'Delivery Charges', and 'Cost'. The wording of the aforementioned fields may differ so extract according to the meaning. Please ignore anything else.\n\nHow: Finally, return the data as a JSON response."
                   },
                   {
                      "type": "image_url",
                      "image_url": {
                      "url": f"data:image/jpg;base64,{base64_image}"
                      }                     
                   }
      ]
    }
  ],
  "max_tokens": 300
}

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

print(response.json())

In [None]:
print(response.json()['choices'][0]['message']['content'])

In [6]:
import os

def extract_file_type(file_path):
    x, file_extension = os.path.splitext(file_path)
    print(x)
    return file_extension

# Example usage:
file_path = "data/bank_sheet.png"
file_type = extract_file_type(file_path)
print("File type:", file_type)


data/bank_sheet
File type: .png


In [8]:
f"data:image/"+ extract_file_type(file_path) + f";base64"

data/bank_sheet


'data:image/.png;base64'

In [1]:
import json

# JSON response string
json_response = "{\n\t\"last_balance_sheet_date\": \"January 31, 2024\",\n\t\"cash_on_hand\": \"$10,000\",\n\t\"bank_name\": \"First National Bank\",\n\t\"bank_balance\": \"$50,000\",\n\t\"list_of_products\": [\n\t\t{\n\t\t\t\"product_name\": \"Widget A\",\n\t\t\t\"product_cost\": \"$10\",\n\t\t\t\"quantity\": \"100\",\n\t\t\t\"delivery_charges\": \"$5\"\n\t\t},\n\t\t{\n\t\t\t\"product_name\": \"Widget B\",\n\t\t\t\"product_cost\": \"$20\",\n\t\t\t\"quantity\": \"50\",\n\t\t\t\"delivery_charges\": \"$8\"\n\t\t},\n\t\t{\n\t\t\t\"product_name\": \"Widget C\",\n\t\t\t\"product_cost\": \"$15\",\n\t\t\t\"quantity\": \"75\",\n\t\t\t\"delivery_charges\": \"$6\"\n\t\t}\n\t]\n}"

# Parse JSON string
data = json.loads(json_response)

# Print formatted JSON
print(json.dumps(data, indent=4))


{
    "last_balance_sheet_date": "January 31, 2024",
    "cash_on_hand": "$10,000",
    "bank_name": "First National Bank",
    "bank_balance": "$50,000",
    "list_of_products": [
        {
            "product_name": "Widget A",
            "product_cost": "$10",
            "quantity": "100",
            "delivery_charges": "$5"
        },
        {
            "product_name": "Widget B",
            "product_cost": "$20",
            "quantity": "50",
            "delivery_charges": "$8"
        },
        {
            "product_name": "Widget C",
            "product_cost": "$15",
            "quantity": "75",
            "delivery_charges": "$6"
        }
    ]
}


In [2]:
import json

# JSON response string
json_response = "{\n\t\"last_balance_sheet_date\": \"31/03/1998\",\n\t\"cash_on_hand\": \"$3,000\",\n\t\"bank_name\": \"Mars Galactic Bank\",\n\t\"bank_balance\": \"N/A\",\n\t\"list_of_products\": [\n\t\t{\n\t\t\t\"product_name\": \"Apple\",\n\t\t\t\"product_cost\": \"$1\",\n\t\t\t\"quantity\": \"36\",\n\t\t\t\"delivery_charges\": \"$10\"\n\t\t},\n\t\t{\n\t\t\t\"product_name\": \"Mouse\",\n\t\t\t\"product_cost\": \"$5\",\n\t\t\t\"quantity\": \"5\",\n\t\t\t\"delivery_charges\": \"$2\"\n\t\t}\n\t]\n}"

# Parse JSON string
data = json.loads(json_response)

# Print formatted JSON
print(json.dumps(data, indent=4))


{
    "last_balance_sheet_date": "31/03/1998",
    "cash_on_hand": "$3,000",
    "bank_name": "Mars Galactic Bank",
    "bank_balance": "N/A",
    "list_of_products": [
        {
            "product_name": "Apple",
            "product_cost": "$1",
            "quantity": "36",
            "delivery_charges": "$10"
        },
        {
            "product_name": "Mouse",
            "product_cost": "$5",
            "quantity": "5",
            "delivery_charges": "$2"
        }
    ]
}


In [6]:
import json

# JSON response string
json_response = "{\n\t\"last_balance_sheet_date\": \"January 31, 2024\",\n\t\"cash_on_hand\": \"$10,000\",\n\t\"bank_name\": \"First National Bank\",\n\t\"bank_balance\": \"$50,000\",\n\t\"list_of_products\": [\n\t\t{\n\t\t\t\"product_name\": \"Widget A\",\n\t\t\t\"product_cost\": \"$10\",\n\t\t\t\"quantity\": \"100\",\n\t\t\t\"delivery_charges\": \"$5\"\n\t\t},\n\t\t{\n\t\t\t\"product_name\": \"Widget B\",\n\t\t\t\"product_cost\": \"$20\",\n\t\t\t\"quantity\": \"50\",\n\t\t\t\"delivery_charges\": \"$8\"\n\t\t},\n\t\t{\n\t\t\t\"product_name\": \"Widget C\",\n\t\t\t\"product_cost\": \"$15\",\n\t\t\t\"quantity\": \"75\",\n\t\t\t\"delivery_charges\": \"$6\"\n\t\t}\n\t]\n}"

# Parse JSON string
data = json.loads(json_response)

# Write formatted JSON data to a file
with open('output.json', 'w') as f:
    json.dump(data, f, indent=4)

In [10]:

import os
import config
from utils import encode_image, extract_file_type
import requests
from dotenv import load_dotenv
import json
load_dotenv()


file_path = 'data/bank_sheet.png'


base64_image = encode_image(file_path)

headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
}

payload = {
    'model':"gpt-4-turbo",
    'messages': config.PROMPT + [{
    "role": "user",
    "content": [{
        "type": "image_url",
        "image_url": {
            "url": f"data:image/" + extract_file_type(file_path) + f";base64,{base64_image}"
        }
    }]
}],
    'max_tokens':1000,
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

with open('../output.json', 'w') as f:
    json.dump(response.json()['choices'][0]['message']['content'], f, indent=4)


In [12]:
# print(response.json()['choices'][0]['message']['content'])

In [9]:
import json

# JSON response string
json_response = response.json()['choices'][0]['message']['content']

# Parse JSON string into a Python dictionary
data = json.loads(json_response)

# Write formatted JSON data to a file
with open('output.json', 'w') as f:
    json.dump(data, f, indent=4)


In [3]:
print(r"White
      ")


SyntaxError: EOL while scanning string literal (831864081.py, line 1)

In [None]:
this   is 
my sent 
ence