In [None]:
!pip install -q -U google-generativeai

In [None]:
import google.generativeai as genai
import PIL.Image
from google.colab import userdata
from google.colab import drive
import os
import cv2
import shutil
import numpy as np
import time
import json

from googleapiclient.discovery import build
from google.oauth2 import service_account
from google.auth import default
from oauth2client.client import GoogleCredentials
from google.colab import auth


In [None]:
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
model = genai.GenerativeModel('models/gemini-pro-vision')

# imgprompt = """Generate 5 words that list objects in the image,
# and 5 words that capture more nuanced concepts/ideas in the image.
# List just the words, all on one line, with a space separating each word,
# no titles, no words or characters other than the 10 words"""

imgprompt = """summarize the objects in the image,
and include details that capture more nuanced concepts/ideas in the image.
List just the summary, all on one line, with a space separating each word,
no titles, no words or characters other than the summary"""

audioprompt1 = """Generate 5 words that list instruments/elements present
in this audio file, and 5 words that capture more nuanced concepts/ideas
such as genre(pop, jazz, hiphop) or \"conversation\" in the audio.
List just the words, all on one line, with a space separating
each word, no titles, no words or characters other than the 10 words"""

audioprompt2 = """Give me the transcript of words in the audio file
all on one line, words separated by a space,
if there aren't any words, such as if the audio is just nature sounds,
don't print anything"""

# videoprompt = """Generate 30 objects and literal elements that appear in
# the video. Then, generate another 30 words that describe characteristics
# of the overall video. Could be genre, could be the idea of the video,
# could be the feeling that a viewer might get when watching. These 30
# should capture more nuanced concepts/ideas. List just the words, all
# on one line, with a space separating each word, no titles, no words or
# characters other than the 30 words"""

videoprompt = """Generate a summary of the video, include literal
elements that appear in the video. Also describe characteristics
of the overall video. Could be genre, could be the idea of the video,
could be the feeling that a viewer might get when watching. The summary
should also capture more nuanced concepts/ideas. List just the words, all
on one line, with a space separating each word, no titles, no words or
characters other than the summary"""

In [None]:
def get_drive_file_url(file_path):
    # Mount Google Drive if not already mounted
    if not os.path.ismount('/content/drive'):
        drive.mount('/content/drive')

    # Initialize Google Drive API
    creds, _ = default()
    service = build('drive', 'v3', credentials=creds)

    # Ensure the file exists
    if not os.path.exists(file_path):
        return "File not found"

    # Extract the file name from the path
    file_name = os.path.basename(file_path)

    # Search for the file by name in the user's drive
    query = f"name = '{file_name}' and trashed = false"
    results = service.files().list(q=query, spaces='drive',
                                   fields='files(id, name)').execute()
    items = results.get('files', [])

    if not items:
        return "No file found with that name"

    # Assuming the first search result is the file we want
    file_id = items[0]['id']
    file_name = items[0]['name']

    # Return the URL to access the file
    return f"https://drive.google.com/file/d/{file_id}/view"

def image_Embeddings(file_path, db):
  #Run Query
  img = PIL.Image.open(file_path)
  response = model.generate_content([imgprompt,img])
  print(response.text)

  #Generate Embeddings
  embedding = genai.embed_content(
          model="models/text-embedding-004",
          content=response.text,
          task_type="document")

  db.append([embedding, file_path])

def audio_Embeddings(file_path, db):
    print("audio")
    audio = genai.upload_file(file_path)
    model = genai.GenerativeModel('models/gemini-1.5-pro-latest')
    response = model.generate_content([audioprompt1,audio])
    transcript = model.generate_content([audioprompt2,audio])
    words = response.text.split()
    lyrics = transcript.text.split()

    for word in words:
        embedding = genai.embed_content(
            model="models/text-embedding-004",
            content=word,
            task_type="semantic_similarity")
        db.append([embedding, file_path])

    lyrics_embedding = genai.embed_content(
            model="models/text-embedding-004",
            content=lyrics,
            task_type="semantic_similarity")

    db.append([lyrics_embedding, file_path])


def video_Embeddings(file_path, db):

    video_file_name = file_path

    # Create or cleanup existing extracted image frames directory.
    FRAME_EXTRACTION_DIRECTORY = "./content/frames"
    FRAME_PREFIX = "_frame"

    class File:
      def __init__(self, file_path: str, display_name: str = None):
        self.file_path = file_path
        if display_name:
          self.display_name = display_name
        self.timestamp = get_timestamp(file_path)

      def set_file_response(self, response):
        self.response = response

    def get_timestamp(filename):
      """Extracts the frame count (as an integer) from a filename with the format
        'output_file_prefix_frame00:00.jpg'.
      """
      parts = filename.split(FRAME_PREFIX)
      if len(parts) != 2:
          return None  # Indicates the filename might be incorrectly formatted
      return parts[1].split('.')[0]

    def create_frame_output_dir(output_dir):
      if not os.path.exists(output_dir):
        os.makedirs(output_dir)
      else:
        shutil.rmtree(output_dir)
        os.makedirs(output_dir)

    def extract_frame_from_video(video_file_path):
      print(f"Extracting {video_file_path} at 1 frame per second. This might take a bit...")
      create_frame_output_dir(FRAME_EXTRACTION_DIRECTORY)
      vidcap = cv2.VideoCapture(video_file_path)
      fps = vidcap.get(cv2.CAP_PROP_FPS)
      frame_duration = 1 / fps  # Time interval between frames (in seconds)
      output_file_prefix = os.path.basename(video_file_path).replace('.', '_')
      frame_count = 0
      count = 0
      #interval in seconds that we do an extract, init to 1 for 1 extraction per second. Can be set depending on the length of the video
      interval = 1
      while vidcap.isOpened():
          success, frame = vidcap.read()
          if not success: # End of video
              break
          if int(count / interval / fps) == frame_count: # Extract a frame every second
              min = frame_count // 60
              sec = frame_count % 60
              time_string = f"{min:02d}:{sec:02d}"
              image_name = f"{output_file_prefix}{FRAME_PREFIX}{time_string}.jpg"
              output_filename = os.path.join(FRAME_EXTRACTION_DIRECTORY, image_name)
              cv2.imwrite(output_filename, frame)
              frame_count += 1
          count += 1
      vidcap.release() # Release the capture object\n",
      video_file_length = int(frame_count / fps)
      print(f"Completed video frame extraction!\n\nExtracted: {frame_count} frames")

    extract_frame_from_video(video_file_name)

    #upload frames

    # Process each frame in the output directory
    files = os.listdir(FRAME_EXTRACTION_DIRECTORY)
    files = sorted(files)
    files_to_upload = []
    for file in files:
      files_to_upload.append(
          File(file_path=os.path.join(FRAME_EXTRACTION_DIRECTORY, file)))

    # Upload the files to the API
    # Only upload a 10 second slice of files to reduce upload time.
    # Change full_video to True to upload the whole video.
    full_video = True

    uploaded_files = []
    print(f'Uploading {len(files_to_upload) if full_video else 10} files. This might take a bit...')

    for file in files_to_upload if full_video else files_to_upload[40:50]:
      print(f'Uploading: {file.file_path}...')
      response = genai.upload_file(path=file.file_path)
      file.set_file_response(response)
      uploaded_files.append(file)


    def make_request(prompt, files):
      request = [prompt]
      for file in files:
        request.append(file.timestamp)
        request.append(file.response)
      return request

    # Make the LLM request.
    request = make_request(videoprompt, uploaded_files)
    response = model.generate_content(request,
                                      request_options={"timeout": 600})
    print(response.text)
    video_summary = response.text
    words = video_summary.split()

    # for word in words:
    embedding = genai.embed_content(
        model="models/text-embedding-004",
        content=video_summary,
        task_type="document")


    db.append([embedding, file_path])


In [None]:
def generate_Embeddings(path, db):
    # Check the file extension to determine its type
    file_extension = os.path.splitext(path)[1].lower()

    if file_extension in ['.mp4', '.avi', '.mov', '.mkv']:
        # Process video file
        print("Processing video file:", path)
        # Your code to generate video embeddings goes here
        video_Embeddings(path, db)

    elif file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
        # Process image file
        print("Processing image file:", path)
        # Your code to generate image embeddings goes here
        image_Embeddings(path, db)

    elif file_extension in ['.mp3', '.wav', '.flac', '.aac']:
        # Process audio file
        print("Processing audio file:", path)
        # Your code to generate audio embeddings goes here
        #audio_Embeddings(path, db)

def traverse_files(directory, db):
    # Iterate through all files and folders in the directory
    for filename in os.listdir(directory):
        # Ignore system files
        if not filename.startswith('.'):
            # Get the full path of the file or folder
            file_path = os.path.join(directory, filename)
            # Check if it's a file
            if os.path.isfile(file_path):
                # Print the file path if it's a file
                processed = False
                i = 0
                while not processed and i < 5:
                  try:
                    generate_Embeddings(file_path, db)
                    processed = True
                    if i > 0:
                      print("Finally worked!")
                  except:
                    print("Resource exhausted, waiting 20 seconds and retrying")
                    time.sleep(20)
                    i += 1
                if not processed:
                  print("We tried 5 times and failed; skipping this one.")

            else:
                # If it's a directory, recursively call the function
                traverse_files(file_path ,db)

In [None]:
# Get the root directory of the mounted Google Drive
root_dir = "/content/drive/My Drive"

database = []

# Start traversing from the root directory
traverse_files(root_dir, database)

np.save(root_dir + "/embedding_space",np.array(database))

with open(root_dir + '/embedding_space.json', 'w') as json_file:
    json.dump(database, json_file, indent=4)