<a href="https://colab.research.google.com/github/theadityasurana/langchain-projects/blob/main/imginfo%20extactor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install -q -U google-generativeai


In [3]:
import google.generativeai as genai

In [4]:
# Used to securely store your API key
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

genai.configure(api_key=GOOGLE_API_KEY)

In [5]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)


models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-1.5-flash
models/gemini-1.5-flash-001
models/gemini-1.5-flash-latest
models/gemini-1.5-pro
models/gemini-1.5-pro-001
models/gemini-1.5-pro-latest
models/gemini-pro
models/gemini-pro-vision


In [6]:
# Model Configuration
MODEL_CONFIG = {
  "temperature": 0.2,
  "top_p": 1,
  "top_k": 32,
  "max_output_tokens": 4096,
}

## Safety Settings of Model
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

In [7]:
model = genai.GenerativeModel(model_name = "gemini-pro-vision",
                              generation_config = MODEL_CONFIG,
                              safety_settings = safety_settings)

In [8]:
from pathlib import Path

def image_format(image_path):
    img = Path(image_path)

    if not img.exists():
        raise FileNotFoundError(f"Could not find image: {img}")

    image_parts = [
        {
            "mime_type": "image/png", ## Mime type are PNG - image/png. JPEG - image/jpeg. WEBP - image/webp
            "data": img.read_bytes()
        }
    ]
    return image_parts


In [9]:
def gemini_output(image_path, system_prompt, user_prompt):

    image_info = image_format(image_path)
    input_prompt= [system_prompt, image_info[0], user_prompt]
    response = model.generate_content(input_prompt)
    return response.text

In [17]:
system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """

image_path = "/content/rec1.png"

user_prompt = "What is the balance amount in the image?"

gemini_output(image_path, system_prompt, user_prompt)

' The balance amount is 154.06.'

In [20]:
system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """
#system_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
image_path = "/content/rec1.png"
user_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "

In [21]:
output = gemini_output(image_path, system_prompt, user_prompt)

In [22]:
from IPython.display import Markdown
Markdown(output)

 ```json
{
  "receipt": {
    "from": {
      "name": "East Repair Inc.",
      "address": "1912 Harvest Lane\nNew York, NY 12210"
    },
    "to": {
      "name": "John Smith",
      "address": "2 Court Square\nNew York, NY 12210"
    },
    "date": "11/02/2019",
    "invoice": "US-001",
    "po": "2312/2019",
    "due": "26/02/2019",
    "items": [
      {
        "qty": 1,
        "description": "Front and rear brake cables",
        "unit_price": 100.00,
        "amount": 100.00
      },
      {
        "qty": 2,
        "description": "New set of pedal arms",
        "unit_price": 15.00,
        "amount": 30.00
      },
      {
        "qty": 3,
        "description": "Labor 3hrs",
        "unit_price": 5.00,
        "amount": 15.00
      }
    ],
    "subtotal": 145.00,
    "tax": 6.25,
    "total": 154.06,
    "terms": "Payment is due within 15 days",
    "signature": "John Smith"
  }
}
```