In [None]:
!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate httpx==0.28.1 gradio

In [None]:
import os
import requests
from IPython.display import Markdown, display, update_display
from google.colab import drive
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig, AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, TextIteratorStreamer
import torch
import gradio as gr
import threading

In [None]:
AUDIO_MODEL = "openai/whisper-medium"
MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"

In [None]:
drive.mount("/content/drive")
audio_filename = "/content/drive/MyDrive/llms/Weekly_Meeting_Example.mp3"

In [None]:
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
speech_model = AutoModelForSpeechSeq2Seq.from_pretrained(AUDIO_MODEL, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True)
speech_model.to('cuda')
processor = AutoProcessor.from_pretrained(AUDIO_MODEL)

pipe = pipeline(
    "automatic-speech-recognition",
    model=speech_model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch.float16,
    device='cuda',
)

In [None]:
system_message = "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown."

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(MODEL, device_map="auto", quantization_config=quant_config)

In [None]:
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True, skip_prompt=True)

In [None]:
def summary_audio(audio_filepath):

    if not audio_filepath:
        yield "Please provide an audio file path."
        return

    try:
      audio_filepath = "/content/drive/MyDrive/" + audio_filepath

      result = pipe(audio_filepath, return_timestamps=True)
      transcription = result["text"]

      llm_messages = [{"role": "system", "content": f"{system_message}\n\nMeeting Transcript:\n{transcription}"}]
      inputs = tokenizer.apply_chat_template(llm_messages, return_tensors="pt").to("cuda")

      generation_args = {
            "inputs": inputs,
            "max_new_tokens": 2000,
            "streamer": streamer
        }

      generation_thread = threading.Thread(target=model.generate, kwargs=generation_args)
      generation_thread.start()

      buffer = ""
      for new_text in streamer:
          buffer += new_text
          yield buffer

    except FileNotFoundError:
        yield f"Error: File not found at {audio_filepath}"
    except Exception as e:
        yield f"Error processing audio or generating response: {e}"

In [None]:
with gr.Blocks() as ui:
    gr.Markdown("# Meeting Summary Chatbot")
    gr.Markdown("Enter the Google Drive path to an audio file and chat about its transcription.")

    with gr.Row():
        audio_path = gr.Textbox(label="Audio File Path:", lines=1)

    with gr.Row():
        summary = gr.Button("Summary")

    with gr.Row():
        text_summary_out = gr.Textbox(label="Result:", lines=20)

    summary.click(fn=summary_audio, inputs=[audio_path], outputs=text_summary_out)


In [None]:
ui.launch(share=True)