<a href="https://colab.research.google.com/github/rabimba/GDE-ML-Artifacts/blob/main/gemini1.5/File_API_Video.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Setup your API key

To run the following cell, your API key must be stored it in a Colab Secret named `GOOGLE_API_KEY`. If you don't already have an API key, or you're not sure how to create a Colab Secret, see [Authentication](https://github.com/google-gemini/gemini-api-cookbook/blob/main/quickstarts/Authentication.ipynb) for an example.

In [None]:
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

### Build a Google API client for the Gemini API

The Gemini API Python SDK does not support the File API [yet](https://github.com/google/generative-ai-python/pull/249). In the meantime, you can build a [Google API Python client](https://github.com/googleapis/google-api-python-client) from the `v1beta` API Discovery Document.

In [None]:
# Fetch the discovery docs for the Generative Language API service.
from googleapiclient.discovery import build
import googleapiclient
import requests

DISCOVERY_URL = f'https://generativelanguage.googleapis.com/$discovery/rest?version=v1beta&key={GOOGLE_API_KEY}';
discovery_docs = requests.get(DISCOVERY_URL).content
genai_service = googleapiclient.discovery.build_from_document(discovery_docs, developerKey=GOOGLE_API_KEY)

### Extract frames using OpenCV

The following code uses OpenCV to extract image frames from the video at 1 frame per second.

If you uploaded your own file in the colab, change `video_file_name` to the full file name like: `gemini.mp4`.

In [None]:
import cv2
import os
import shutil

# Create or cleanup existing extracted image frames directory.
FRAME_EXTRACTION_DIRECTORY = "/content/frames"
FRAME_PREFIX = "_frame"
def create_frame_output_dir(output_dir):
  if not os.path.exists(output_dir):
    os.makedirs(output_dir)
  else:
    shutil.rmtree(output_dir)
    os.makedirs(output_dir)

def extract_frame_from_video(video_file_path):
  print(f"Extracting {video_file_path} at 1 frame per second. This might take a bit...")
  create_frame_output_dir(FRAME_EXTRACTION_DIRECTORY)
  vidcap = cv2.VideoCapture(video_file_path)
  fps = int(vidcap.get(cv2.CAP_PROP_FPS))
  output_file_prefix = os.path.basename(video_file_path).replace('.', '_')
  success,image = vidcap.read()
  frame_count = 0  # Initialize a frame counter
  count = 0
  while vidcap.isOpened():
      success, frame = vidcap.read()
      if not success:  # End of video
          break
      if count % int(fps) == 0:  # Extract a frame every second
          image_name = f"{output_file_prefix}{FRAME_PREFIX}{frame_count:04d}.jpg"
          output_filename = os.path.join(FRAME_EXTRACTION_DIRECTORY, image_name)
          cv2.imwrite(output_filename, frame)
          frame_count += 1
      count += 1
  vidcap.release()  # Release the capture object
  print(f"Completed video frame extraction!\n\nExtracted: {frame_count} frames")


video_file_name = "https://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4" # @param {type:"string"}
extract_frame_from_video(video_file_name)

In [None]:
import mimetypes
import os

class File:
  def __init__(self, file_path: str, display_name: str = None,
               timestamp_seconds: int = None, mimetype: str = None, uri = None):
    self.file_path = file_path
    if display_name:
      self.display_name = display_name
    if timestamp_seconds != None:
      self.timestamp = seconds_to_time_string(timestamp_seconds)
    # Detect mimetype if not specified
    self.mimetype = mimetype if mimetype else mimetypes.guess_type(file_path)[0]
    self.uri = uri

  def set_file_uri(self, uri):
    self.uri = uri

def seconds_to_time_string(seconds):
  """Converts an integer number of seconds to a string in the format '00:00'.
     Format is the expected format for Gemini 1.5.
  """
  minutes = seconds // 60
  seconds = seconds % 60
  return f"{minutes:02d}:{seconds:02d}"

def get_timestamp_seconds(filename):
  """Extracts the frame count (as an integer) from a filename with the format
     'output_file_prefix_frame0000.jpg'.
  """
  parts = filename.split(FRAME_PREFIX)
  if len(parts) != 2:
      return None  # Indicate that the filename might be incorrectly formatted

  frame_count_str = parts[1].split(".")[0]
  return int(frame_count_str)

# Process each frame in the output directory
files = os.listdir(FRAME_EXTRACTION_DIRECTORY)
files = sorted(files)  # Sort alphabetically
files_to_upload = []
for file in files:
  files_to_upload.append(
      File(file_path=os.path.join(FRAME_EXTRACTION_DIRECTORY, file),
           timestamp_seconds=get_timestamp_seconds(file)))

# Upload the files to the API
uploaded_files = []
print(f'Uploading {len(files_to_upload)} files. This might take a bit...')
for file in files_to_upload:
  print(f'Uploading: {file.file_path}...')
  response = genai_service.media().upload(
      media_body=file.file_path,
      media_mime_type = file.mimetype).execute()
  file.set_file_uri(response["file"]["uri"])
  uploaded_files.append(file)

print(f"Completed file uploads!\n\nUploaded: {len(uploaded_files)} files")

## List Files

`files.list` lets you see all files that have been uploaded to the File API that are associated with the Cloud project your API key belongs to. Only the `name` (and by extension, the `uri`) are unique. Only use the `displayName` to identify files if you manage uniqueness yourself.

In [None]:
# List files uploaded in the API
response = genai_service.files().list().execute()
print(response)

## Generate Content

After the file has been uploaded, you can make `GenerateContent` requests that reference the File API URI.

To understand videos with Gemini 1.5 Pro, provide 2 consecutive `Part`s for each frame: a `text` part with the **timestamp** and `fileData` part with the frame's **image URI**:

```
part { text = "00:00" }
part { fileData = fileData {
  fileUri = "https://generativelanguage.googleapis.com/v1/files/frame-0"
  mimeType = "image/jpeg"
}}
```

In [None]:
# Make GenerateContent Request
def makeGenerateContentRequest(prompt, files):
  generateContent = {"contents": [{ "parts": [{ "text": prompt }]}]}
  for file in files:
    generateContent["contents"][0]["parts"].extend(makeVideoPart(file))
  return generateContent

def makeVideoPart(file):
  return [
      {"text": file.timestamp},
      {"file_data": {"file_uri": file.uri, "mime_type": file.mimetype }}]

prompt = "Tell me about this video." # @param {type:"string"}
model = "models/gemini-1.5-pro-latest" # @param ["models/gemini-1.5-pro-latest", "models/gemini-1.0-pro-vision-latest"]
response = genai_service.models().generateContent(
    model = model,
    body = makeGenerateContentRequest(prompt, uploaded_files)).execute()
print(response)

## Delete Files

Files are automatically deleted after 2 days or you can manually delete them using `files.delete()`.

In [None]:
# Delete the files with its resource name
print(f'Deleting {len(files_to_upload)} images. This might take a bit...')
for file in uploaded_files:
  resource = file.uri.split("/files/")[-1]
  response = genai_service.files().delete(name=f"files/{resource}").execute()
  print(f'Deleted {file.file_path} as URI {file.uri}')

print(f"Completed deleting files!\n\nDeleted: {len(uploaded_files)} files")