In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

| | |
|-|-|
|Author(s) | [Lavi Nigam](https://github.com/lavinigam-gcp) |

Notebooks:
**goo.gle/io24-gemini-api**

Google AI Cookbook:
**goo.gle/google-ai-cookbook**

Vertex AI Cookbook:
**goo.gle/vertex-ai-cookbook**

# Step 1 - Google Cloud Vertex AI Gemini API

[What Changed: ]
- SDK installation

Before:

- ! pip install --upgrade google-generativeai

After:

- ! pip install --upgrade google-cloud-aiplatform

In [None]:
# Library installation # needs restarts

# ! pip install --upgrade google-generativeai
! pip install --upgrade google-cloud-aiplatform
! pip install PyPDF2
# ! apt-get install poppler-utils
# ! pip install pdf2image

[What Changed: ]
- Authentication  

Before:

- genai.configure(api_key="API Key")

After:

 - Authenticate Colab user to Google Cloud
 - Vertex AI init with Project ID and Region

  [Not required in Colab Enterprise]

In [None]:
# Authentication

# genai.configure(api_key="")

import sys

# Additional authentication is required for Google Colab
if "google.colab" in sys.modules:
    # Authenticate user to Google Cloud
    from google.colab import auth

    auth.authenticate_user()

if "google.colab" in sys.modules:
    # Define project information
    PROJECT_ID = ""  # @param {type:"string"}
    LOCATION = "us-central1"  # @param {type:"string"}

    # Initialize Vertex AI
    import vertexai

    vertexai.init(project=PROJECT_ID, location=LOCATION)

In [None]:
# Downloading data from Google Drive
!gdown --folder https://drive.google.com/drive/folders/1RfMScB0Y1LUQdW5tvjyYA4_D21H5HMaT?usp=sharing -O /content/data/

[What Changed: ]

-


Before:

-

After:

-


[What Changed: ]

- Import statement


Before:

- import google.generativeai as genai

After:

- import vertexai.generative_models as genai


In [None]:
# Library
from IPython.display import display, Markdown, Latex
# import google.generativeai as genai
import vertexai.generative_models as genai
import PyPDF2
from rich import print as rich_print
from rich.markdown import Markdown as rich_Markdown

[No Change ]

- Model Config and Safety Setting


Before:



After:


In [None]:
# Set up the model
from vertexai.generative_models import (
    GenerationConfig,
    HarmCategory,
    HarmBlockThreshold,
)


generation_config = GenerationConfig(
    temperature=1,
    max_output_tokens=8192,
)


safety_settings = {
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}

model = genai.GenerativeModel(model_name="gemini-1.5-pro-preview-0409",
                              generation_config=generation_config,
                              safety_settings=safety_settings)

model_flash = genai.GenerativeModel(model_name="gemini-1.5-flash-preview-0514",
                              generation_config=generation_config,
                              safety_settings=safety_settings)

### PDF

[No Change]

In [None]:
#@title Helper Functions
## Helper Function

def pdf_to_dict_str(file_path):
    """Reads a PDF file and returns a dictionary with page numbers as keys and page text as values."""

    with open(file_path, 'rb') as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        num_pages = len(pdf_reader.pages)

        page_dict = {}
        for page_num in range(num_pages):
            page = pdf_reader.pages[page_num]
            page_text = page.extract_text()
            page_dict[page_num + 1] = page_text  # Page numbers start from 1

        return str(page_dict)

In [None]:
%%time

prompt_parts = [
  pdf_to_dict_str("/content/data/Idea/stage_1_prototype_Google Cloud TPU blog.pdf"),
  "What are key achievement for Google Cloud from the following blog. Mention in bullet.",
]
response = model.generate_content(prompt_parts)
rich_Markdown(response.text)

In [None]:
%%time

response = model_flash.generate_content(prompt_parts)
rich_Markdown(response.text)

In [None]:
%%time

prompt_parts = [
  pdf_to_dict_str('/content/data/Idea/goog-10-q-q2-2023-4-1-15.pdf'),
  """Answer based on the document provided:
- How has Google Cloud performed in last quarters?
- What factors have influenced its numebrs and is it positive or negative?
- What is the leadership view on Google Cloud business?
- What are some changes and decisions they have made along the way to achieve their goals?
- How are they doing with respect to other cloud companies?
  """,
]
response = model.generate_content(prompt_parts)
rich_Markdown(response.text)

In [None]:
%%time

response = model_flash.generate_content(prompt_parts)
rich_Markdown(response.text)

### Audio File

[What Changed: ]

- File Upload method


Before:

- genai.upload_file

After:

- Part.from_uri [reading from GCS]


In [None]:
from vertexai.generative_models import Part

In [None]:
# earning_calls = genai.upload_file(path="/content/stage_1_prototype_Alphabet 2023 Q4 Earnings Call.mp3",
#                             display_name="earning_calls")

earning_calls = Part.from_uri(uri = "gs://gemini-lavi-asset/production/earning_calls/Alphabet 2023 Q4 Earnings Call.mp3",
    mime_type="audio/mpeg")

In [None]:
%%time

prompt_parts = [
  earning_calls,
  "How has performance max worked out for Google Ads?",
]
response = model.generate_content(prompt_parts)
rich_Markdown(response.text)

In [None]:
%%time

response = model_flash.generate_content(prompt_parts)
rich_Markdown(response.text)

### Images

[What Changed: ]

- File Upload method
- PDF to Image method


Before:

- genai.upload_file
- pdf_to_images

After:

- Part.from_uri [it can read pdf as image directly]
- No need of custom function


In [None]:
blog_pdf = Part.from_uri(uri = "gs://gemini-lavi-asset/idea_doc/stage_1_prototype_Google Cloud TPU blog.pdf",
    mime_type="application/pdf")

In [None]:
%%time

prompt_parts = [
  blog_pdf,
  "what is the emfu for bf16 and 128b parameter model with 1 tpu v5e pod? Cite the table and page number and explain the significance of the results",
]
response = model.generate_content(prompt_parts)
rich_Markdown(response.text)

In [None]:
%%time

response = model_flash.generate_content(prompt_parts)
rich_Markdown(response.text)

### Video

[What Changed: ]

- File Upload method
- Splitting video into frame

Before:

- genai.upload_file
- process_video

After:

- Part.from_uri [reading from GCS]
- No need to convert the video->frames


In [None]:
#@title Helper Functions
## Helper Function

import cv2
import os
import shutil

def process_video(video_file_path, full_video=False):
    """
    Extracts frames from a video, optionally uploads them, and returns a list of File objects.

    Args:
        video_file_path (str): Path to the video file.
        full_video (bool, optional): If True, uploads all frames. Otherwise, uploads a 10-second slice. Defaults to False.

    Returns:
        list: A list of File objects, each containing file path, response (if uploaded), and timestamp.
    """

    FRAME_EXTRACTION_DIRECTORY = "/content/frames"
    FRAME_PREFIX = "_frame"

    def create_frame_output_dir(output_dir):
        """Creates or cleans up the frame output directory."""
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        else:
            shutil.rmtree(output_dir)
            os.makedirs(output_dir)

    def get_timestamp(filename):
        """Extracts the timestamp from a frame filename."""
        parts = filename.split(FRAME_PREFIX)
        if len(parts) != 2:
            return None  # Incorrect format
        return parts[1].split('.')[0]

    class File:
        """Represents a file with path, display name, timestamp, and optional response."""
        def __init__(self, file_path, display_name=None):
            self.file_path = file_path
            self.display_name = display_name or os.path.basename(file_path)
            self.timestamp = get_timestamp(file_path)

        def set_file_response(self, response):
            self.response = response

    # Extract frames
    create_frame_output_dir(FRAME_EXTRACTION_DIRECTORY)
    vidcap = cv2.VideoCapture(video_file_path)
    if not vidcap.isOpened():
        raise ValueError("Could not open video file.")

    fps = vidcap.get(cv2.CAP_PROP_FPS)
    frame_duration = 1 / fps
    output_file_prefix = os.path.basename(video_file_path).replace('.', '_')
    frame_count = 0
    count = 0

    while vidcap.isOpened():
        success, frame = vidcap.read()
        if not success:
            break  # End of video

        if int(count / fps) == frame_count:
            min = frame_count // 60
            sec = frame_count % 60
            time_string = f"{min:02d}:{sec:02d}"
            image_name = f"{output_file_prefix}{FRAME_PREFIX}{time_string}.jpg"
            output_filename = os.path.join(FRAME_EXTRACTION_DIRECTORY, image_name)
            cv2.imwrite(output_filename, frame)
            frame_count += 1

        count += 1

    vidcap.release()
    print(f"Extracted: {frame_count} frames")

    # Process and optionally upload frames
    files = [File(os.path.join(FRAME_EXTRACTION_DIRECTORY, file)) for file in sorted(os.listdir(FRAME_EXTRACTION_DIRECTORY))]
    files_to_upload = files if full_video else files[40:50]  # Adjust slice as needed

    uploaded_files = []
    print(f'Uploading {len(files_to_upload)} files...')
    for file in files_to_upload:
        print(f'Uploading: {file.file_path}...')
        try:
            response = genai.upload_file(path=file.file_path)  # Assuming 'genai' is available
            file.set_file_response(response)
            uploaded_files.append(file)
        except Exception as e:
            print(f"Error uploading {file.file_path}: {e}")

    print(f"Uploaded: {len(uploaded_files)} files")
    return uploaded_files

# Make GenerateContent request with the structure described above.
def make_request(prompt, files):
  request = [prompt]
  for file in files:
    request.append(file.timestamp)
    request.append(file.response)
  return request

In [None]:
# # Upload a video ~ 1min
# video_file_name = "/content/What's next for AI and Google Search _ Google I_O 2023.mp4"
# processed_files_search = process_video(video_file_name, full_video=True)

In [None]:
video = Part.from_uri(uri = "gs://gemini-lavi-asset/idea_doc/What's next for AI and Google Search _ Google I_O 2023.mp4",
    mime_type="video/mp4")

In [None]:
%%time

prompt_parts = [
  video,
  "Describe this video and How is google using generative ai in search? Give response in bullet",
]
response = model.generate_content(prompt_parts)
Markdown(response.text)

In [None]:
%%time
response = model_flash.generate_content(prompt_parts)
Markdown(response.text)