# Working with Audio

## Univeral Code Used for the Entire Notebook

Let's set up our libraries and client

In [None]:
# Install necessary packages for handling sound files and sound devices
# Uncomment the following line if you need to install the packages
# !pip install soundfile sounddevice


In [None]:
import os  # os module for interacting with the operating system
import requests  # requests module for making HTTP requests
from io import BytesIO  # BytesIO module for handling byte streams

import soundfile as sf  # soundfile module for reading and writing sound files
import sounddevice as sd  # sounddevice module for playing and recording sound

from IPython.display import Audio, display  # IPython display modules for displaying audio

from openai import OpenAI  # OpenAI module for interacting with OpenAI API


In [None]:
client = OpenAI()  # Initialize the OpenAI client


### Getting Audio without Streaming

Using the openai api library approach

In [None]:
# Define the speech file path
speech_file_path = "./fight_on_the_beaches.mp3"

# Create the TTS (Text-to-Speech) request
response = client.audio.speech.create(
    model="tts-1-hd",  # Specify the TTS model to use
    voice="fable",  # Specify the voice to use for the TTS
    input="""
    Even though large tracts of Europe and many old and famous States have fallen or may fall into the grip of the Gestapo and all the odious apparatus of Nazi rule, we shall not flag or fail. We shall go on to the end, we shall fight in France, we shall fight on the seas and oceans, we shall fight with growing confidence and growing strength in the air, we shall defend our Island, whatever the cost may be, we shall fight on the beaches, we shall fight on the landing grounds, we shall fight in the fields and in the streets, we shall fight in the hills; we shall never surrender, and even if, which I do not for a moment believe, this Island or a large part of it were subjugated and starving, then our Empire beyond the seas, armed and guarded by the British Fleet, would carry on the struggle, until, in God’s good time, the New World, with all its power and might, steps forth to the rescue and the liberation of the old.
    """  # Input text to be converted to speech
)

# Save the response audio to a file
with open(speech_file_path, 'wb') as file:
    file.write(response.content)  # Write the audio content to the file

# Print a message indicating where the audio was saved
print(f"Audio saved to {speech_file_path}")


Using the API endpoint approach

In [None]:

# Define the speech file path
speech_file_path = "./old_soldiers_never_die.mp3"
api_key = os.getenv("OPENAI_API_KEY")

# API endpoint and headers
url = "https://api.openai.com/v1/audio/speech"
headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

# Data payload for the request
data = {
    "model": "tts-1",
    "voice": "shimmer",
    "input": """
    I still remember the refrain of one of the most popular barracks ballads of that day which proclaimed most proudly that old soldiers never die; they just fade away. And like the old soldier of that ballad, I now close my military career and just fade away, an old soldier who tried to do his duty as God gave him the light to see that duty.
    """
}

# Make the synchronous request
response = requests.post(url, headers=headers, json=data)

# Check if the request was successful
if response.status_code == 200:
    with open(speech_file_path, 'wb') as file:
        file.write(response.content)
    print(f"Audio saved to {speech_file_path}")
else:
    print(f"Error: {response.status_code} - {response.text}")


Chat Completion to Audio without End-to-End Streaming

In [None]:
from openai import OpenAI  # Import OpenAI module for interacting with the OpenAI API
import os  # os module for interacting with the operating system
import requests  # requests module for making HTTP requests
import soundfile as sf  # soundfile module for reading and writing sound files
import sounddevice as sd  # sounddevice module for playing and recording sound
from io import BytesIO  # BytesIO module for handling byte streams

# Set up OpenAI API key
api_key = os.getenv("OPENAI_API_KEY")

# Initialize the OpenAI client
client = OpenAI()

# Define the speech file path
speech_file_path = "./war_message_to_congress.mp3"

# Create the chat completion request
chat_completion = client.chat.completions.create(
    model="gpt-4o",  # Specify the model to use
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},  # System message to set the assistant's behavior
        {"role": "user", "content": "Give me one paragraph on penguins"}  # User message to initiate the conversation
    ],
    stream=True  # Enable streaming responses
)

# Capture the chat output in real-time
chat_output = ""
for chunk in chat_completion:
    delta = chunk.choices[0].delta
    content = getattr(delta, 'content', None)  # Safely get the content attribute
    if content:
        print(content, end='')  # Print without newline to maintain the flow
        chat_output += content

# API endpoint and headers for TTS
url = "https://api.openai.com/v1/audio/speech"
headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

# Data payload for the TTS request
data = {
    "model": "tts-1-hd",
    "voice": "onyx",
    "input": chat_output,  # Use chat output as input for TTS
    "response_format": "mp3"  # Use MP3 format for response
}

# Make the TTS request with streaming enabled
response = requests.post(url, headers=headers, json=data, stream=True)

# Check if the TTS request was successful
if response.status_code == 200:
    buffer = BytesIO()

    # Save audio chunks to file and buffer
    with open(speech_file_path, 'wb') as file:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                file.write(chunk)
                buffer.write(chunk)
    
    buffer.seek(0)
    
    # Read and play the audio from the buffer
    data, samplerate = sf.read(buffer)
    sd.play(data, samplerate)
    sd.wait()  # Wait until the audio playback is done

    print(f"Audio saved to {speech_file_path}")
else:
    print(f"Error: {response.status_code} - {response.text}")


### Streaming Audio

Using the openai api library approach

(doesn't work)

In [None]:
# Define the speech file path
speech_file_path = "./wonderfulday.mp3"

# Create the TTS (Text-to-Speech) request
response = client.audio.speech.create(
    model="tts-1",  # Specify the TTS model to use
    voice="alloy",  # Specify the voice to use for the TTS
    input="Today is a wonderful day to build something people love!"  # Input text to be converted to speech
)

# Save the response audio to a file using stream_to_file method
response.stream_to_file(speech_file_path)

# Print a message indicating where the audio was saved
print(f"Audio saved to {speech_file_path}")


Using the API endpoint approach

In [None]:
# Set up OpenAI API key
api_key = os.getenv("OPENAI_API_KEY")

# Define the speech file path
speech_file_path = "./war_message_to_congress.mp3"

# API endpoint and headers
url = "https://api.openai.com/v1/audio/speech"
headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

# Data payload for the request
data = {
    "model": "tts-1-hd",
    "voice": "onyx",
    "input": """
    The present German submarine warfare against commerce is a warfare against mankind.

It is war against all nations.

American ships have been sunk, American lives taken, in ways which it has stirred us very deeply to learn of, but the ships and people of other neutral and friendly nations have been sunk and overwhelmed in the waters in the same way. There has been no discrimination. The challenge is to all mankind.

Each nation must decide for itself how it will meet it. The choice we make for ourselves must be made with a moderation of counsel and temperateness of judgment befitting our character and our motives as a nation. We must put excited feeling away. Our motive will not be revenge or the victorious assertion of the physical might of the nation, but only the vindication of right, of human right, of which we are only a single champion.
    """,
    "response_format": "mp3"  # Use MP3 format for response
}

# Make the request with streaming enabled
response = requests.post(url, headers=headers, json=data, stream=True)

# Check if the request was successful
if response.status_code == 200:
    buffer = BytesIO()

    # Save audio chunks to file and buffer
    with open(speech_file_path, 'wb') as file:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                file.write(chunk)
                buffer.write(chunk)
    
    buffer.seek(0)
    
    # Read and play the audio from the buffer
    data, samplerate = sf.read(buffer)
    sd.play(data, samplerate)
    sd.wait()  # Wait until the audio playback is done

    print(f"Audio saved to {speech_file_path}")
else:
    print(f"Error: {response.status_code} - {response.text}")


## Streaming Chat Completions to TTS for Input

In [None]:

# Create the chat completion request
completion = client.chat.completions.create(
    model="gpt-4o",  # Specify the model to use
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},  # System message to set the assistant's behavior
        {"role": "user", "content": "Give me one paragraph on penguins"}  # User message to initiate the conversation
    ],
    stream=True  # Enable streaming responses
)

# Iterate over the streamed response chunks and print the content
for chunk in completion:
    delta = chunk.choices[0].delta
    content = getattr(delta, 'content', None)  # Safely get the content attribute
    if content:
        print(content, end='')  # Print without newline to maintain the flow


In [None]:

# Set up OpenAI API key
api_key = os.getenv("OPENAI_API_KEY")

# Initialize the OpenAI client
client = OpenAI()

# Define the speech file path
speech_file_path = "./war_message_to_congress.mp3"

# Create the chat completion request
chat_completion = client.chat.completions.create(
    model="gpt-4o",  # Specify the model to use
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},  # System message to set the assistant's behavior
        {"role": "user", "content": "Give me one paragraph on penguins"}  # User message to initiate the conversation
    ],
    stream=True  # Enable streaming responses
)

# Capture the chat output in real-time
chat_output = ""
for chunk in chat_completion:
    delta = chunk.choices[0].delta
    content = getattr(delta, 'content', None)  # Safely get the content attribute
    if content:
        print(content, end='')  # Print without newline to maintain the flow
        chat_output += content

# API endpoint and headers for TTS
url = "https://api.openai.com/v1/audio/speech"
headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

# Data payload for the TTS request
data = {
    "model": "tts-1-hd",
    "voice": "onyx",
    "input": chat_output,  # Use chat output as input for TTS
    "response_format": "mp3"  # Use MP3 format for response
}

# Make the TTS request with streaming enabled
response = requests.post(url, headers=headers, json=data, stream=True)

# Check if the TTS request was successful
if response.status_code == 200:
    buffer = BytesIO()

    # Save audio chunks to file and buffer
    with open(speech_file_path, 'wb') as file:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                file.write(chunk)
                buffer.write(chunk)
    
    buffer.seek(0)
    
    # Read and play the audio from the buffer
    data, samplerate = sf.read(buffer)
    sd.play(data, samplerate)
    sd.wait()  # Wait until the audio playback is done

    print(f"Audio saved to {speech_file_path}")
else:
    print(f"Error: {response.status_code} - {response.text}")
