In [1]:
# Import the os package
import os

# Imports via openai docs
from pathlib import Path

# import the dotenv package
from dotenv import load_dotenv

import pprint

# From the IPython.display package, import display and Markdown
from IPython.display import display, Markdown, Audio

"""
Install the Google AI Python SDK

$ pip install google-generativeai

See the getting started guide for more information:
https://ai.google.dev/gemini-api/docs/get-started/python
"""

import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Get the current working directory
cwd = os.getcwd()
# Construct the .env file path
env_path = os.path.join(cwd, '.env')

# Load the .env file
load_dotenv(dotenv_path=env_path)

True

from google AI Studio

In [7]:
from google.cloud import texttospeech

In [8]:


genai.configure(api_key=os.environ["GOOGLE_GEN_AI_API_KEY"])

def upload_to_gemini(path, mime_type=None):
  """Uploads the given file to Gemini.

  See https://ai.google.dev/gemini-api/docs/prompting_with_media
  """
  file = genai.upload_file(path, mime_type=mime_type)
  print(f"Uploaded file '{file.display_name}' as: {file.uri}")
  return file

# Create the model
# See https://ai.google.dev/api/python/google/generativeai/GenerativeModel
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE",
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE",
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE",
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE",
  },
]

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  safety_settings=safety_settings,
  generation_config=generation_config,
)

# TODO Make these files available on the local file system
# You may need to update the file paths
audio_drive0 = upload_to_gemini("output.mp3", mime_type="audio/ogg")

chat_session = model.start_chat(
  history=[
    {
      "role": "user",
      "parts": [
        audio_drive0,
        "transcribe audio file and output text\n",
      ],
    },
  ]
)

response = chat_session.send_message("INSERT_INPUT_HERE")

print(response.text)
print(chat_session.history)

Uploaded file 'output.mp3' as: https://generativelanguage.googleapis.com/v1beta/files/oroe84ftxtw5
Hello how are you 
[parts {
  file_data {
    mime_type: "audio/ogg"
    file_uri: "https://generativelanguage.googleapis.com/v1beta/files/oroe84ftxtw5"
  }
}
parts {
  text: "transcribe audio file and output text\n"
}
role: "user"
, parts {
  text: "INSERT_INPUT_HERE"
}
role: "user"
, parts {
  text: "Hello how are you "
}
role: "model"
]
