In [1]:
pip install openai-whisper torch numpy --no-cache-dir

Note: you may need to restart the kernel to use updated packages.


In [2]:
!pip install sounddevice scipy

Collecting sounddevice
  Downloading sounddevice-0.5.1-py3-none-win_amd64.whl.metadata (1.4 kB)
Downloading sounddevice-0.5.1-py3-none-win_amd64.whl (363 kB)
   ---------------------------------------- 0.0/363.6 kB ? eta -:--:--
   - -------------------------------------- 10.2/363.6 kB ? eta -:--:--
   ------- -------------------------------- 71.7/363.6 kB 1.3 MB/s eta 0:00:01
   -------------------------------- ------- 297.0/363.6 kB 3.1 MB/s eta 0:00:01
   ---------------------------------------- 363.6/363.6 kB 2.8 MB/s eta 0:00:00
Installing collected packages: sounddevice
Successfully installed sounddevice-0.5.1


In [3]:
# Import required libraries
import whisper
import sounddevice as sd
import numpy as np
import queue
import tempfile
import wave
import torch



In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cpu


In [5]:
model = whisper.load_model("base").to(device)

In [6]:
audio_queue = queue.Queue()

In [7]:
# Parameters
SAMPLE_RATE = 16000  # Whisper requires 16kHz audio
THRESHOLD = 1000  # Silence threshold (adjust as needed)
SILENCE_DURATION = 1.5  # Time in seconds to stop after silence

In [11]:
# Global variable to store transcribed text
transcribed_text = ""

# Function to capture audio in real-time
def callback(indata, frames, time, status):
    """Receives microphone input and adds it to the queue."""
    if status:
        print(status)
    audio_queue.put(indata.copy())

# Function to record live audio and transcribe in real-time
def live_transcribe():
    global transcribed_text  # Use the global variable
    
    print("🎤 Speak now... (Stops when silent)")
    
    # Open a stream for real-time audio capture
    with sd.InputStream(callback=callback, samplerate=SAMPLE_RATE, channels=1, dtype="int16"):
        audio_data = []
        silent_frames = 0

        while True:
            # Get audio chunk from queue
            chunk = audio_queue.get()
            audio_data.extend(chunk)

            # Check if silent (low volume)
            if np.abs(chunk).mean() < THRESHOLD:
                silent_frames += 1
            else:
                silent_frames = 0  # Reset if sound is detected

            # Stop recording if silence is detected for `SILENCE_DURATION`
            if silent_frames > SILENCE_DURATION * SAMPLE_RATE / len(chunk):
                break

    # Convert audio data to numpy array
    audio_data = np.array(audio_data, dtype=np.int16)

    # Save temporary audio file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
        wavefile = wave.open(temp_audio.name, 'wb')
        wavefile.setnchannels(1)
        wavefile.setsampwidth(2)
        wavefile.setframerate(SAMPLE_RATE)
        wavefile.writeframes(audio_data.tobytes())
        wavefile.close()
        temp_audio_path = temp_audio.name

    # Transcribe using Whisper
    print("📝 Transcribing...")
    result = model.transcribe(temp_audio_path)
    
    # Store transcribed text in the global variable
    transcribed_text = result["text"]
    
    # Print the transcribed text
    print("Transcribed Text:", transcribed_text)


In [18]:
live_transcribe()

🎤 Speak now... (Stops when silent)
📝 Transcribing...
Transcribed Text:  What is the height of high-filtower in France?


In [1]:
transcribed_text = "Hi how are you?"

In [2]:
import os

# Set the API key (temporarily for this session)
os.environ["OPENAI_API_KEY"] = "your_key"  # Replace with your actual key

In [7]:
from openai import OpenAI
client = OpenAI()

completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {
            "role": "user",
            "content": "Write a haiku about recursion in programming."
        }
    ]
)

print(completion.choices[0].message)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [13]:
# pip install elevenlabs

from elevenlabs import stream
from elevenlabs.client import ElevenLabs
from IPython.display import Audio

true_text = transcribed_text
# Initialize ElevenLabs client with your API key
client = ElevenLabs(api_key="your_key")

# Convert text to speech and get the audio stream
audio_stream = client.text_to_speech.convert_as_stream(
    text=true_text,
    voice_id="9BWtsMINqrJLrRacOk9x",
    model_id="eleven_multilingual_v2"
)

# Collect the audio stream in memory as bytes
audio_data = b""
for chunk in audio_stream:
    if isinstance(chunk, bytes):
        audio_data += chunk

# Play the audio directly from the memory
Audio(audio_data, autoplay=True)