In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

| | |
|-|-|
|Author(s) | [Lavi Nigam](https://github.com/lavinigam-gcp) |


**bit.ly / io24-gemini-api**

# Step 1 - Google AI for Developers Gemini API

In [1]:
# Library installation # needs restarts
! pip install PyPDF2
! apt-get install poppler-utils
! pip install pdf2image
! pip install --upgrade google-generativeai

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  poppler-utils
0 upgraded, 1 newly installed, 0 to remove and 45 not upgraded.
Need to get 186 kB of archives.
After this operation, 696 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 poppler-utils amd64 22.02.0-2ubuntu0.4 [186 kB]
Fetched 186 kB in 0s (760 kB/s)
Selecting previously unselected package poppler-utils.
(Reading database ... 121918 files and directories currently installed.)
Preparing to unpack .../poppler-utils_22.02.0-2ubuntu0.4_amd64.deb ...
Unpacking poppler-utils (22.02.0-2ubuntu0.4) ...
Setting up p

In [1]:
# Downloading data from Google Drive
!gdown --folder https://drive.google.com/drive/folders/1RfMScB0Y1LUQdW5tvjyYA4_D21H5HMaT?usp=sharing -O /content/data/
# !gdown --folder https://drive.google.com/drive/folders/10Mih6hD6Ln8xNX8L7V4q1vjR9ArsXkWQ?usp=drive_link -O /content/data/

Retrieving folder contents
Processing file 1iT0qe9EU8RQyiAlrMlJ-SH-E_WtMGFNm goog-10-q-q2-2023-4-1-15.pdf
Processing file 1qs4blSxatemVxpZ3dt1yzESDa4aNWptD Google's newest and most capable AI  Gemini.mp3
Processing file 1oX_TaQZjOiuDUwsTrqz7zq5HMEe7cEMR Google's newest and most capable AI _ Gemini.mp4
Processing file 1totf67pJ3XYhILCY0TrbS1tHm7PwCZES Responsible AI _ Google I_O 2023.mp4
Processing file 1kaq58S4oRmuZgDqK109uc6mwedVmPAby stage_1_prototype_Alphabet 2023 Q4 Earnings Call.mp3
Processing file 1GXGfwoe8-vsoOM1E8SBOCybc7R5XRd2P stage_1_prototype_Google Cloud TPU blog.pdf
Processing file 19aqpEZqTVTERbySYVrtDgSjuUBUabeSE What's next for AI and Google Search _ Google I_O 2023.mp4
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1iT0qe9EU8RQyiAlrMlJ-SH-E_WtMGFNm
To: /content/data/Idea/goog-10-q-q2-2023-4-1-15.pdf
100% 217k/217k [00:00<00:00, 85.7MB/s]
Downloading...
From: h

In [2]:
# Library
from IPython.display import display, Markdown, Latex
import google.generativeai as genai
import PyPDF2
from rich import print as rich_print
from rich.markdown import Markdown as rich_Markdown

In [3]:
# Authentication
from google.colab import userdata
genai.configure(api_key=userdata.get('api_key'))

In [4]:
# Set up the model
from google.generativeai.types import HarmCategory, HarmBlockThreshold, GenerationConfig

generation_config = GenerationConfig(
    temperature=1,
    max_output_tokens=8192,
)

safety_settings = {
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}


model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest",
                              generation_config=generation_config,
                              safety_settings=safety_settings)

model_flash = genai.GenerativeModel(model_name="gemini-1.5-flash-latest",
                              generation_config=generation_config,
                              safety_settings=safety_settings)

### PDF

In [6]:
#@title Helper Functions
## Helper Function

def pdf_to_dict_str(file_path):
    """Reads a PDF file and returns a dictionary with page numbers as keys and page text as values."""

    with open(file_path, 'rb') as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        num_pages = len(pdf_reader.pages)

        page_dict = {}
        for page_num in range(num_pages):
            page = pdf_reader.pages[page_num]
            page_text = page.extract_text()
            page_dict[page_num + 1] = page_text  # Page numbers start from 1

        return str(page_dict)

In [7]:
%%time

prompt_parts = [
  pdf_to_dict_str('/content/data/Idea/stage_1_prototype_Google Cloud TPU blog.pdf'),
  "What are key achievement for Google Cloud from the following blog. Mention in bullet.",
]
response = model.generate_content(prompt_parts)
rich_Markdown(response.text)

CPU times: user 836 ms, sys: 18.1 ms, total: 854 ms
Wall time: 14.7 s


In [12]:
%%time

response = model_flash.generate_content(prompt_parts)
rich_Markdown(response.text)

CPU times: user 113 ms, sys: 14.3 ms, total: 127 ms
Wall time: 7.16 s


In [10]:
%%time
prompt_parts = [
  pdf_to_dict_str('/content/data/Idea/goog-10-q-q2-2023-4-1-15.pdf'),
  """Answer based on the document provided:
- How has Google Cloud performed in last quarters?
- What factors have influenced its numebrs and is it positive or negative?
- What is the leadership view on Google Cloud business?
- What are some changes and decisions they have made along the way to achieve their goals?
- How are they doing with respect to other cloud companies?
  """,
]
response = model.generate_content(prompt_parts)
rich_Markdown(response.text)

CPU times: user 1.36 s, sys: 28.6 ms, total: 1.39 s
Wall time: 14 s


In [5]:
for models in genai.list_models():
  print(models.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-1.5-flash-latest
models/gemini-1.5-pro-latest
models/gemini-pro
models/gemini-pro-vision
models/embedding-001
models/text-embedding-004
models/aqa


### Audio File

In [12]:
%%time

earning_calls = genai.upload_file(path="/content/data/Idea/stage_1_prototype_Alphabet 2023 Q4 Earnings Call.mp3",
                            display_name="earning_calls")

CPU times: user 526 ms, sys: 207 ms, total: 734 ms
Wall time: 3.52 s


In [14]:
%%time

prompt_parts = [
  earning_calls,
  "what are major comments by Sundar and Ruth about AI and what key questions were asked by analyst and give their name? give answer in bullets ",
]
response = model.generate_content(prompt_parts,request_options={"timeout": 600})
rich_Markdown(response.text)

CPU times: user 629 ms, sys: 59.8 ms, total: 689 ms
Wall time: 49.1 s


In [17]:
%%time

response = model_flash.generate_content(prompt_parts)
rich_Markdown(response.text)

CPU times: user 433 ms, sys: 45.5 ms, total: 479 ms
Wall time: 29.6 s


In [16]:
# delete the file
genai.delete_file(name=earning_calls.name)

### Images (Read PDF as Images)

In [17]:
#@title Helper Functions
## Helper Function

import os
from pdf2image import convert_from_path

def pdf_to_images(file_path, output_folder="output_folder"):
    """Converts a PDF file into images for each page, saves them to a folder, and returns a list of image paths."""

    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Convert PDF to images
    images = convert_from_path(file_path, dpi=200, output_folder=output_folder, fmt="jpg")

    # Generate image paths
    image_paths = []
    pages_list = []
    upload_file_name = []
    for i, image in enumerate(images):
        image_name = f"page_{i+1}.jpg"  # Page numbers start from 1
        image_path = os.path.join(output_folder, image_name)
        image.save(image_path, "JPEG")
        image_paths.append(image_path)
        files_to_upload = genai.upload_file(image_path, display_name=image_path)
        pages_list.append(files_to_upload)
        upload_file_name.append([files_to_upload.name, files_to_upload.display_name])

    return image_paths, pages_list, upload_file_name

def make_request(prompt, files):
  request = [prompt]
  for file in files:
    request.append(file)
  return request

def delete_files(output_folder):
  for filename in os.listdir(output_folder):
    if filename.endswith(".jpg"):
      file_path = os.path.join(output_folder, filename)
      genai.delete_file(name=file_path)

In [18]:
%%time

file_path = "/content/data/Idea/stage_1_prototype_Google Cloud TPU blog.pdf"
output_folder = "/content/output_images"
image_paths, pages_list, upload_file_name = pdf_to_images(file_path, output_folder)


response = model.generate_content(
make_request("what is the emfu for bf16 and 128b parameter model with 1 tpu v5e pod? Cite the table and page number and explain the significance of the results",
             pages_list),request_options={"timeout": 600})
rich_Markdown(response.text)

CPU times: user 1.13 s, sys: 457 ms, total: 1.59 s
Wall time: 42.7 s


In [22]:
%%time

response = model_flash.generate_content(
make_request("what is the emfu for bf16 and 128b parameter model with 1 tpu v5e pod? Cite the table and page number and explain the significance of the results",
             pages_list))
rich_Markdown(response.text)

CPU times: user 61.9 ms, sys: 5.32 ms, total: 67.3 ms
Wall time: 2.91 s


In [None]:
for filename in upload_file_name:
  print("Deleteing...",filename[0])
  genai.delete_file(name=filename[0])

Deleteing... files/9bymqnfpiwi5
Deleteing... files/kjvhqi6x4ca4
Deleteing... files/5uli74dmzg57
Deleteing... files/mwuo84jq1xt2
Deleteing... files/ihioosq2kdmj
Deleteing... files/jhmckpola1sw
Deleteing... files/waye8t5wt6lq
Deleteing... files/h4g5elr1isg2
Deleteing... files/y4wryk42ggim
Deleteing... files/rtok1qd7wtj5
Deleteing... files/suiyspsbzi1c
Deleteing... files/yyjmenhh9ola
Deleteing... files/ucklv75pt593
Deleteing... files/38g5pv53tyet
Deleteing... files/ufonat85xkvx
Deleteing... files/31fhpft812ii
Deleteing... files/opptznt67old
Deleteing... files/lh28yo7mr623


### Video

In [23]:
#@title Helper Functions
## Helper Function

import cv2
import os
import shutil

def process_video(video_file_path, full_video=False):
    """
    Extracts frames from a video, optionally uploads them, and returns a list of File objects.

    Args:
        video_file_path (str): Path to the video file.
        full_video (bool, optional): If True, uploads all frames. Otherwise, uploads a 10-second slice. Defaults to False.

    Returns:
        list: A list of File objects, each containing file path, response (if uploaded), and timestamp.
    """

    FRAME_EXTRACTION_DIRECTORY = "/content/frames"
    FRAME_PREFIX = "_frame"

    def create_frame_output_dir(output_dir):
        """Creates or cleans up the frame output directory."""
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        else:
            shutil.rmtree(output_dir)
            os.makedirs(output_dir)

    def get_timestamp(filename):
        """Extracts the timestamp from a frame filename."""
        parts = filename.split(FRAME_PREFIX)
        if len(parts) != 2:
            return None  # Incorrect format
        return parts[1].split('.')[0]

    class File:
        """Represents a file with path, display name, timestamp, and optional response."""
        def __init__(self, file_path, display_name=None):
            self.file_path = file_path
            self.display_name = display_name or os.path.basename(file_path)
            self.timestamp = get_timestamp(file_path)

        def set_file_response(self, response):
            self.response = response

    # Extract frames
    create_frame_output_dir(FRAME_EXTRACTION_DIRECTORY)
    vidcap = cv2.VideoCapture(video_file_path)
    if not vidcap.isOpened():
        raise ValueError("Could not open video file.")

    fps = vidcap.get(cv2.CAP_PROP_FPS)
    frame_duration = 1 / fps
    output_file_prefix = os.path.basename(video_file_path).replace('.', '_')
    frame_count = 0
    count = 0

    while vidcap.isOpened():
        success, frame = vidcap.read()
        if not success:
            break  # End of video

        if int(count / fps) == frame_count:
            min = frame_count // 60
            sec = frame_count % 60
            time_string = f"{min:02d}:{sec:02d}"
            image_name = f"{output_file_prefix}{FRAME_PREFIX}{time_string}.jpg"
            output_filename = os.path.join(FRAME_EXTRACTION_DIRECTORY, image_name)
            cv2.imwrite(output_filename, frame)
            frame_count += 1

        count += 1

    vidcap.release()
    # print(f"Extracted: {frame_count} frames")

    # Process and optionally upload frames
    files = [File(os.path.join(FRAME_EXTRACTION_DIRECTORY, file)) for file in sorted(os.listdir(FRAME_EXTRACTION_DIRECTORY))]
    files_to_upload = files if full_video else files[40:50]  # Adjust slice as needed

    uploaded_files = []
    # print(f'Uploading {len(files_to_upload)} files...')
    for file in files_to_upload:
        # print(f'Uploading: {file.file_path}...')
        try:
            response = genai.upload_file(path=file.file_path)  # Assuming 'genai' is available
            file.set_file_response(response)
            uploaded_files.append(file)
        except Exception as e:
            print(f"Error uploading {file.file_path}: {e}")

    # print(f"Uploaded: {len(uploaded_files)} files")
    return uploaded_files

# Make GenerateContent request with the structure described above.
def make_request(prompt, files):
  request = [prompt]
  for file in files:
    request.append(file.timestamp)
    request.append(file.response)
  return request

In [24]:
# Upload a video ~ 1min
video_file_name = "/content/data/Idea/What's next for AI and Google Search _ Google I_O 2023.mp4"
processed_files_search = process_video(video_file_name, full_video=True)

In [25]:
%%time

request = make_request("Describe this video and How is google using generative ai in search? Give response in bullet", processed_files_search)
response = model.generate_content(request,
                                  request_options={"timeout": 600})
rich_Markdown(response.text)

CPU times: user 150 ms, sys: 16.1 ms, total: 167 ms
Wall time: 8.22 s


In [27]:
%%time

request = make_request("Describe this video and How is google using generative ai in search? Give response in bullet", processed_files_search)
response = model_flash.generate_content(request,
                                  request_options={"timeout": 600})
rich_Markdown(response.text)

CPU times: user 103 ms, sys: 5.47 ms, total: 109 ms
Wall time: 5.01 s


## Embeddings

In [28]:
title = "The next generation of AI for developers and Google Workspace"
sample_text = ("Title: The next generation of AI for developers and Google Workspace"
    "\n"
    "Full article:\n"
    "\n"
    "Gemini API & Google AI Studio: An approachable way to explore and prototype with generative AI applications")

model = 'models/text-embedding-004'
embedding = genai.embed_content(model=model,
                                content=sample_text,
                                task_type="retrieval_document",
                                title=title)

print(embedding)

{'embedding': [-0.0021609126, -0.003164448, -0.060120765, -0.0071218405, 0.00087754615, 0.04058192, 0.04457149, 0.035524692, -0.047465388, 0.008888606, -0.027958257, 0.011335692, -0.0024438684, 0.0030851841, -0.018796144, -0.055550933, 0.031426456, 0.00065491674, -0.11370059, 0.06370807, -0.021750022, -0.021367034, -0.09982074, -0.008604742, -0.033300586, -0.012815639, 0.07153146, 0.03706478, 0.02297012, 0.043331206, 0.01067061, 0.040685344, 0.03636141, -0.036222056, -0.017799364, -0.014820968, 0.0053205043, -0.017382711, 0.07044941, 0.0020212498, -0.018208733, 0.017558081, 0.006493213, 0.12724239, -0.023805205, 0.010057812, -0.0006948954, 0.07085626, -0.056457285, 0.01831114, 0.09046226, 0.021575559, -0.06656088, 0.026865069, -0.0034812505, -0.0011228691, -0.06535635, -0.0018169151, 0.08672994, 0.02874761, -0.024817277, 0.004653874, -0.058998518, 0.03206169, -0.022604037, -0.015454266, -0.013758667, 0.021129975, -0.047893398, 0.02573244, 0.013028228, -0.018002002, -0.039879415, 0.0234

In [29]:
len(embedding['embedding'])

768