In [None]:
import requests
import torch
import bitsandbytes as bnb
import transformers
import sentencepiece
import accelerate
import openai
import httpx

In [None]:
# imports

import os
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google.colab import drive
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch

In [None]:
# Constants

AUDIO_MODEL = "whisper-1"
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
audio_filename="/content/drive/MyDrive/denver_extract.mp3"

In [None]:
# Sign in to HuggingFace Hub

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
openai_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=openai_api_key)

In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(LLAMA)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=quant_config)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
import time

In [None]:
def transcribe_audio(audio_file):
    try:
        with open(audio_file, "rb") as f:
            transcript_response = openai.audio.transcriptions.create(model="whisper-1", file=f)
        return transcript_response.text
    except Exception as e:
        return f"❌ Error in transcription: {str(e)}"

# ------------------------------
# Streaming generator function
def stream_minutes(transcript):
    try:
        messages = [
            {
                "role": "system",
                "content": "You are an assistant that produces detailed meeting minutes in markdown format including summary, attendees, key discussion points, takeaways, and action items with owners."
            },
            {
                "role": "user",
                "content": f"Below is a transcript of a meeting. Please write the minutes in markdown format:\n\n{transcript}"
            }
        ]

        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

        generated_ids = model.generate(
            inputs,
            max_new_tokens=2000,
            do_sample=False,
            return_dict_in_generate=True,
            output_scores=False
        ).sequences[0]

        output = ""
        for i in range(inputs.shape[1], len(generated_ids)):
            token_id = generated_ids[i].item()
            output += tokenizer.decode([token_id], skip_special_tokens=True)
            yield output
            time.sleep(0.02)

    except Exception as e:
        yield f"❌ Error while generating minutes: {str(e)}"

In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
!pip install gradio
import gradio as gr

Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

In [None]:
with gr.Blocks() as demo:
    gr.Markdown("## 🎙️ Meeting Minutes Generator with Streaming")

    audio_input = gr.Audio(label="Upload Meeting Audio", type="filepath")
    transcript_box = gr.Textbox(label="Transcript", lines=10)
    minutes_markdown = gr.Markdown(label="📋 Minutes (Markdown Streaming)")

    transcribe_btn = gr.Button("Transcribe")
    stream_btn = gr.Button("Generate Meeting Minutes")

    transcribe_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=transcript_box)
    stream_btn.click(fn=stream_minutes, inputs=transcript_box, outputs=minutes_markdown)

demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://af24aa57a88b783fbd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


