In [None]:
pip install google-generativeai


In [None]:
import os
import time
import google.generativeai as genai

from google.colab import userdata

genai.configure(api_key=userdata.get('GOOGLE_API_KEY'))

def upload_to_gemini(path, mime_type=None):
  """Uploads the given file to Gemini.

  See https://ai.google.dev/gemini-api/docs/prompting_with_media
  """
  file = genai.upload_file(path, mime_type=mime_type)
  print(f"Uploaded file '{file.display_name}' as: {file.uri}")
  return file

In [None]:
def wait_for_files_active(files):
  """Waits for the given files to be active.

  Some files uploaded to the Gemini API need to be processed before they can be
  used as prompt inputs. The status can be seen by querying the file's "state"
  field.

  This implementation uses a simple blocking polling loop. Production code
  should probably employ a more sophisticated approach.
  """
  print("Waiting for file processing...")
  for name in (file.name for file in files):
    file = genai.get_file(name)
    while file.state.name == "PROCESSING":
      print(".", end="", flush=True)
      time.sleep(10)
      file = genai.get_file(name)
    if file.state.name != "ACTIVE":
      raise Exception(f"File {file.name} failed to process")
    return file
  print("...all files ready")
  print()

In [None]:
# Create the model
generation_config = {
  "temperature": 0,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

In [None]:
model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
  # safety_settings = Adjust safety settings
  # See https://ai.google.dev/gemini-api/docs/safety-settings
)

# TODO Make these files available on the local file system
# You may need to update the file paths
files = [
  upload_to_gemini("2407.01449v2.pdf", mime_type="application/pdf"),
]

# Some files have a processing delay. Wait for them to be ready.
wait_for_files_active(files)

chat_session = model.start_chat(
  history=[
    {
      "role": "user",
      "parts": [
        files[0],
        "What is the title of this paper?\n\n",
      ],
    },

  ]
)



In [None]:
response = chat_session.send_message("How many Figures are in the paper?")

print(response.text)

print(response.usage_metadata)


In [None]:
response = chat_session.send_message("Who are the authors?")

print(response.text)
print(response.usage_metadata)


In [None]:
response = chat_session.send_message("What are the major contributions of the paper accordig to the authors?")

print(response.text)

print(response.usage_metadata)


### Generate text from text-and-image input

The Gemini API supports multimodal inputs that combine text with media files. The following example shows how to generate text from text-and-image input:

In [None]:
import PIL.Image

model = genai.GenerativeModel("gemini-1.5-flash")
organ = PIL.Image.open(media / "organ.jpg")
response = model.generate_content(["Tell me about this instrument", organ])
print(response.text)

## Generate JSON
When the model is configured to output JSON, it responds to any prompt with JSON-formatted output.

In [None]:
import google.generativeai as genai
import os

genai.configure(api_key=os.environ["API_KEY"])

# Using `response_mime_type` requires either a Gemini 1.5 Pro or 1.5 Flash model
model = genai.GenerativeModel('gemini-1.5-flash',
                              # Set the `response_mime_type` to output JSON
                              generation_config={"response_mime_type": "application/json"})

prompt = """
  List 5 popular cookie recipes.
  Using this JSON schema:
    Recipe = {"recipe_name": str}
  Return a `list[Recipe]`
  """

response = model.generate_content(prompt)
print(response.text)

### Adding Context Cache

In [None]:
files = [
  upload_to_gemini("2407.01449v2.pdf", mime_type="application/pdf"),
]

In [None]:
path_to_pdf_file = '2403.06634.pdf'

# Upload the video using the Files API
pdf_file = genai.upload_file(path=path_to_pdf_file)

# Wait for the file to finish processing
while pdf_file.state.name == 'PROCESSING':
  print('Waiting for video to be processed.')
  time.sleep(2)
  pdf_file = genai.get_file(pdf_file.name)

print(f'Video processing complete: {pdf_file.uri}')

In [None]:
import os
import google.generativeai as genai
from google.generativeai import caching
import datetime
import time

In [None]:
# Create a cache with a 5 minute TTL
cache = caching.CachedContent.create(
    model='models/gemini-1.5-flash-001',
    display_name='PDF-file', # used to identify the cache
    system_instruction=(
        'You are an expert PDF file analyzer, and your job is to answer '
        'the user\'s query based on the PDF file you have access to.'
    ),
    contents=[pdf_file],
    ttl=datetime.timedelta(minutes=15),
)


In [None]:
# Construct a GenerativeModel which uses the created cache.
model = genai.GenerativeModel.from_cached_content(cached_content=cache)

# Query the model
response = model.generate_content([(
    'What is the title of the paper?'
    'Who are the authors? '
    'What are the major contributions of the paper accordig to the authors?'
    'they were introduced for the first time.'
)])

print(response.usage_metadata)

print(response.text)

In [None]:
# Construct a GenerativeModel which uses the created cache.
model = genai.GenerativeModel.from_cached_content(cached_content=cache)

# Query the model
response = model.generate_content([(
    'What is the title of the paper?'
    'Who are the authors? provide a list '
    'What are the major contributions of the paper accordig to the authors?'
    'they were introduced for the first time.'
)])

print(response.usage_metadata)

print(response.text)

In [None]:
for c in caching.CachedContent.list():
  print(c)

In [None]:
response = model.generate_content("What is the main theme of the paper?")
print(response.text)

In [None]:
print(response.usage_metadata)
