# 1. Setup environment

In [2]:
# Mount Google Drive for data storage
from google.colab import drive
drive.mount('/content/drive')

# Install required packages
!pip install git+https://github.com/openai/whisper.git -q
!pip install openai
!pip install urllib
!pip install Audio
!apt-get install -y ffmpeg

"\n# Mount Google Drive for data storage\nfrom google.colab import drive\ndrive.mount('/content/drive')\n\n# Install required packages\n!pip install git+https://github.com/openai/whisper.git -q\n!pip install openai\n!apt-get install -y ffmpeg"

**Replace `openai.api_key =` with Your API Key**

If you are using this notebook to interact with OpenAI's GPT models, make sure to replace instances of `openai.api_key =` with your actual OpenAI API key. Your API key should be enclosed in double or single quotes, like this:

Before replacement:
```python
openai.api_key = "your-old-api-key"


In [None]:
# Import necessary packages
import whisper
import openai
from IPython.display import Audio
import io
import time
import numpy as np
import urllib

# 2. Speech to text part using whisper



**The transcribe function :** we use the model.transcribe function to transcribe the audio from the provided file ("sample_audio.mp3"). The transcribe function takes care of converting the audio into text, making it suitable for scenarios where you only need the textual representation of the spoken words.

**The decode function :** we perform a more comprehensive audio processing pipeline, including creating a log-Mel spectrogram and decoding the audio using a pre-trained model. The decode function goes beyond transcription and provides additional information about the audio, making it suitable for tasks where understanding the structure and content of the audio is important.

**For this tutorial we can use one of them and for more informations check the whisper documentation**

In [None]:
# Function to transcribe audio from a file
def transcribe_audio(file_path):
    """
    Transcribe audio from the provided file.

    Args:
        file_path (str): Path to the audio file.

    Returns:
        str: Transcribed text from the audio.
    """
    model = whisper.load_model("base")
    result = model.transcribe(file_path)
    return result["text"]

def decode_audio(file_path):
    """
    Decode audio from the provided file.

    Args:
        file_path (str): Path to the audio file.

    Returns:
        str: Transcribed text from the decoded audio.
    """

    # Load audio and pad/trim it to fit 30 seconds
    audio = whisper.load_audio(file_path)
    audio = whisper.pad_or_trim(audio)

    # Create a log-Mel spectrogram and move to the same device as the model
    base_model = whisper.load_model("base")
    mel_spectrogram = whisper.log_mel_spectrogram(audio).to(base_model.device)

    # Decode the audio
    options = whisper.DecodingOptions(without_timestamps=True)
    result = whisper.decode(base_model, mel_spectrogram, options)
    text_transcribed = result.text

In [1]:
# Load an example audio file
!wget -O audio.mp3 http://www.moviesoundclips.net/movies1/darkknightrises/darkness.mp3

In [1]:
# Display the audio
Audio("audio.mp3")

In [None]:
# Transcribe audio from the example file OR use the decode function
text_transcribed = transcribe_audio("audio.mp3")

100%|███████████████████████████████████████| 139M/139M [00:02<00:00, 59.6MiB/s]


# 3. Summarize using chatgpt API

In [None]:
# Set up the OpenAI API client
openai.api_key = "PUT YOUR API KEY"  # Replace with your actual API key between the brackets

# Define a prompt for text summarization
prompt = f"Summarize the following text {text_transcribed}"

In [None]:
# Create a message for the model
messages = [{"role": "user", "content": prompt}]

# Request a text summary from the OpenAI GPT-3.5-turbo model
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=messages,
    temperature=0.7,  # Adjust the temperature to control randomness
)

# Get the summarized text
text_summarized = response.choices[0].message["content"]
