In [None]:
import os
from dotenv import load_dotenv, dotenv_values 
import IPython.display as ipd
from glob import glob
from pydub import AudioSegment
import wave
import contextlib

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
import openai
from openai import OpenAI




import gradio as gr

# data handling 

In [None]:
dataset = glob('../audio_AI/audio_resources/*.wav')
sample = dataset[5]
ipd.Audio(sample)

In [19]:
def get_audio_length(file_path):
    audio = AudioSegment.from_mp3(file_path)
    return len(audio) / (60 * 1000)  # length in minutes

In [20]:
def split_audio_into_chunks(file_path, chunk_length=10):
    if get_audio_length(file_path) > 10 :
        

        song = AudioSegment.from_mp3(file_path)
        total_length = get_audio_length(file_path)  # in minutes
        ten_minutes = chunk_length * 60 * 1000  # chunk length in milliseconds

        num_chunks = int(total_length // chunk_length) + (1 if total_length % chunk_length > 0 else 0)

        for i in range(num_chunks):
            start_time = i * ten_minutes
            end_time = start_time + ten_minutes if (start_time + ten_minutes) < len(song) else len(song)

            chunk = song[start_time:end_time]
            chunk.export(f'audio_resources/chunk/audio_chunk_{i + 1}.mp3', format="mp3")
    else:
        return file_path



In [None]:
split_audio_into_chunks('/Users/floky/Desktop/summer traning/projects/audio_AI/audio_resources/videoplayback.wav')

In [None]:
dataset_chunk = glob('../audio_AI/audio_resources/chunk/*.mp3')

sample = dataset_chunk[0]
ipd.Audio(sample)

In [None]:

def get_transcription_audio(file_path, model="whisper-1"):
    client = OpenAI(api_key =OPENAI_API_KEY)
     
    audio_file= open(file_path, "rb")
    transcription = client.audio.transcriptions.create(
        model=model,
        file=audio_file,
        response_format="text"
    )

    
    return transcription


In [58]:
related_context = []

In [None]:
for i in range(len(dataset_chunk)):
    related_context.append(get_transcription_audio(dataset_chunk[i]))

In [None]:
related_context

# prompt engineering PE 

In [None]:
def get_answer(user_prompt, query):
    client = OpenAI(api_key =OPENAI_API_KEY)

    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
        {"role": "system", "content": user_prompt},
        {"role": "user", "content": query},
        ],
    )
    return completion.choices[0].message.content

In [None]:
user_prompt = f"""
You are AudioTranscriberBot, an advanced AI language model designed to assist users by transcribing audio files and providing accurate, helpful, and engaging responses about the transcribed content. Your primary role is to:

1. Accurately transcribe the uploaded MP3 file.
2. Allow users to ask questions about the transcribed content and provide precise and relevant answers.
3. Maintain a polite, respectful, and professional tone at all times.
4. Provide clear, concise, and accurate information.
5. Offer detailed explanations and context when necessary to ensure understanding.
6. Show to the user only the answer.

NOTE: related_context will be delimited by triple backticks 
related_context: ```{related_context}```




---
Your task is to answer in a consistent style.

<user>: Can you summarize the key points from the audio?

<AudioTranscriberBot>: The key points from the audio are as follows:

1. [First key point]
2. [Second key point]
3. [Third key point]

If you need more detailed information on a specific section, please let me know.
"""


# user interface UI

## version 1
Can only handle previous audio

In [None]:
message_list = []
response_list = []

def AudioTranscriberBot(message, history):
    user_prompt = f"""
    You are AudioTranscriberBot, an advanced AI language model designed to assist users by transcribing audio files and providing accurate, helpful, and engaging responses about the transcribed content. Your primary role is to:

1. Accurately transcribe the uploaded file.
2. Allow users to ask questions about the transcribed content and provide precise and relevant answers.
3. Maintain a polite, respectful, and professional tone at all times.
4. Provide clear, concise, and accurate information.
5. Offer detailed explanations and context when necessary to ensure understanding.
6. Show to the user only the answer.

NOTE: related_context will be delimited by triple backticks 
related_context: ```{related_context}```




---
Your task is to answer in a consistent style.

<user>: Can you summarize the key points from the audio?

<AudioTranscriberBot>: The key points from the audio are as follows:

1. [First key point]
2. [Second key point]
3. [Third key point]

If you need more detailed information on a specific section, please let me know.
"""

    conversation = get_answer(user_prompt,message)
    return conversation

demo_chatbot = gr.ChatInterface(
    AudioTranscriberBot,
    title="Audio Transcriber Bot",
    description=" Talk to your audio file",
    multimodal=True,

)


demo_chatbot.launch()

In [91]:
related_context = []

In [93]:
related_context

[]

# version 2 

Can handle user uploaded audio

In [89]:
def get_transcription_audio(messages,file_path):
    model="whisper-1"
    client = OpenAI(api_key =OPENAI_API_KEY)
    if related_context == []:
        if not file_path:
            return "Please upload an audio file to get started."
        
     
        audio_file= open(file_path, "rb")
        transcription = client.audio.transcriptions.create(
            model=model,
            file=audio_file,
            response_format="text"
        )

        related_context.append(transcription)
        
        user_prompt = f"""
You are AudioTranscriberBot, an advanced AI language model designed to assist users by transcribing audio files and providing accurate, helpful, and engaging responses about the transcribed content. Your primary role is to:


1. Accurately transcribe the uploaded file.
2. Allow users to ask questions about the transcribed content and provide precise and relevant answers.
3. Maintain a polite, respectful, and professional tone at all times.
4. Provide clear, concise, and accurate information.
5. Offer detailed explanations and context when necessary to ensure understanding.
6. Show to the user only the answer.

NOTE: related_context will be delimited by triple backticks 
related_context: ```{related_context}```




---
Your task is to answer in a consistent style.

<user>: Can you summarize the key points from the audio?

<AudioTranscriberBot>: The key points from the audio are as follows:

1. [First key point]
2. [Second key point]
3. [Third key point]

If you need more detailed information on a specific section, please let me know.
"""

        completion = client.chat.completions.create(
            model="gpt-4o",
            messages=[
            {"role": "system", "content": user_prompt},
            {"role": "user", "content": messages},
            ],
        )
        return completion.choices[0].message.content
    else:
        user_prompt = f"""
You are AudioTranscriberBot, an advanced AI language model designed to assist users by transcribing audio files and providing accurate, helpful, and engaging responses about the transcribed content. Your primary role is to:


1. Accurately transcribe the uploaded file.
2. Allow users to ask questions about the transcribed content and provide precise and relevant answers.
3. Maintain a polite, respectful, and professional tone at all times.
4. Provide clear, concise, and accurate information.
5. Offer detailed explanations and context when necessary to ensure understanding.
6. Show to the user only the answer.

NOTE: related_context will be delimited by triple backticks 
related_context: ```{related_context}```




---
Your task is to answer in a consistent style.

<user>: Can you summarize the key points from the audio?

<AudioTranscriberBot>: The key points from the audio are as follows:

1. [First key point]
2. [Second key point]
3. [Third key point]

If you need more detailed information on a specific section, please let me know.
"""
        completion = client.chat.completions.create(
            model="gpt-4o",
            messages=[
            {"role": "system", "content": user_prompt},
            {"role": "user", "content": messages},
            ],
        )
        return completion.choices[0].message.content

In [96]:
interface_test = gr.Interface(
    fn=get_transcription_audio,
    inputs=['text', 'file'],
    outputs="text",
    title="Audio Transcriber chatbot",
    description="Talk to your audio file",
    
)

if __name__ == "__main__":
    interface_test.launch()


Running on local URL:  http://127.0.0.1:7897

To create a public link, set `share=True` in `launch()`.
