<a href="https://colab.research.google.com/github/mightyoctopus/business-meeting-minutes-generator/blob/main/w3_d5_meeting_minutes_generation_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai

In [None]:
import os, threading
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google.colab import drive, userdata
from huggingface_hub import login, snapshot_download
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TextIteratorStreamer,
    BitsAndBytesConfig
)
import torch

In [None]:
cache_path = "/content/drive/MyDrive/Colab Notebooks/huggingface_cache"

In [None]:
AUDIO_MODEL = "whisper-1"
LLAMA = "meta-llama/Llama-3.1-8B-Instruct"

In [None]:
### Enables faster downloads using the Rust-based accelorated file transfer
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

In [None]:
model_path = snapshot_download(
    repo_id=LLAMA,
    cache_dir=cache_path,$
    local_dir_use_symlinks=False,
)

In [None]:
### Connect this Colab to Google Drive
drive.mount("/content/drive1", force_remount=True)
# audio_filename = "/content/drive1/MyDrive/Colab Notebooks/audio_data/Meeting_Record/Special Meeting Audio File - April 29, 2025.mp3"

Mounted at /content/drive1


In [None]:
hf_token = userdata.get("HF_TOKEN")
login(hf_token, add_to_git_credential=True)

In [None]:
openai_api_key = userdata.get("OPENAI_API_KEY")
openai = OpenAI(api_key=openai_api_key)

In [None]:
### Use the Whisper model to convert the Audio to Text
def transcribe_audio(audio_file):
    with open(audio_file, "rb") as f:
      transcription = openai.audio.transcriptions.create(
          file=f,
          model=AUDIO_MODEL,
          temperature=0.1,
          response_format="text"
      )
    return transcription

In [None]:
def use_messages(transcript):
    system_message = """
      You're an assistant that produces meetings of minutes from transcripts
      with a summary, key discussion points, takeaways and action items
      with owners in markdown.
      """
    user_prompt = f"""
      Below is an extracted transcript of a Denver council meeting.
      Please write minutes in markdown, including a summary with attendees,
      location and date, discussion points, takeaways, and action items with owners.

      transcript: {transcript}
    """

    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt}
    ]

    return messages

In [None]:
### load model/tokenizer ONCE (faster processing on click)
### Not include in the stream_minutes function -- mistake
bnb_quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token


model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map="auto",
        quantization_config=bnb_quant_config
    )


In [None]:
def stream_minutes(messages):
    inputs = tokenizer.apply_chat_template(
        messages,
        return_tensors="pt",
        tokenize=True
    ).to(model.device)

    streamer = TextIteratorStreamer(
        tokenizer,
        skip_prompt=True,
        skip_special_tokens=True
    )

    gen_kwargs = dict(
        inputs=inputs,
        streamer=streamer,
        max_new_tokens=2000
    )

    ### Run generation in background so it can iterate the streamer
    t = threading.Thread(target=model.generate, kwargs=gen_kwargs)
    t.start()

    partial = ""
    for chunk in streamer:
        partial += chunk
        yield partial  ### Gradio will live-update in the Textbox

In [None]:
!pip install -q gradio
import gradio as gr
import traceback

In [None]:
### Gradio Progress bar(track_tqdm) used
def process_audio_to_text(audio):
    if not audio:
        yield "No audio file uploaded."
        return

    yield "**Processing...**\n_Transcribing the mp3 file and generating minutes, please wait._\n"

    transcript = transcribe_audio(audio)
    messages = use_messages(transcript)

    # stream straight through / LLM streaming
    header = "**Transcription complete!**\n\n"
    for partial in stream_minutes(messages):
        yield header + partial


### --- Gradio UI --- ###
with gr.Blocks(title="Minutes of Meeting Generator", css="footer {visibility: hidden}") as ui:
    gr.HTML("<h1 style='text-align:center'>Minutes of Meeting Generator</h1>")
    audio = gr.Audio(sources=["upload"], type="filepath", label="Upload an MP3 file")
    btn = gr.Button("Transcribe")
    out = gr.Markdown(label="Result might take a couple of minutes to appear, processing a large mp3 file and transcribing.")

    # If fn yields strings, Gradio streams them
    btn.click(process_audio_to_text, inputs=audio, outputs=out)
    ui.queue()

if __name__ == "__main__":
    ui.launch()

In [None]:
!pip freeze | grep -E 'torch|transformers|bitsandbytes|gradio|huggingface_hub|sentencepiece|accelerate|openai' > requirements.txt

In [None]:
%%writefile .gitignore
# Ignore Python cache and virtual environment stuff
__pycache__/
*.py[cod]
*.pyo
*.pyd
*.so

# Ignore notebook checkpoints
.ipynb_checkpoints/

# Ignore system & OS files
.DS_Store
Thumbs.db

# Ignore virtual env folders
env/
venv/

# Ignore Google Colab and Google Drive mounts
/content/drive/
drive/
drive1/

# Ignore token and secret files
*.env
*.secret
*.key

# Ignore HuggingFace and OpenAI cache
~/.cache/
huggingface/


**To view the HF Spaces deployed version, check this code:**
https://github.com/mightyoctopus/business-meeting-minutes-generator/blob/main/app.py

**or check this file:**
https://gist.github.com/mightyoctopus/cab59260d75bc85dc6133092edbf1d36
