# Create meeting minutes from an Audio file

I downloaded some Denver City Council meeting minutes from this dataset:  
https://huggingface.co/datasets/huuuyeah/meetingbank

I've put them in my Google Drive. The goal of this product is to use the Audio to generate meeting minutes, including actions.

In [None]:
!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate openai

In [None]:
# imports

import os
import requests
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from google.colab import drive
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch

In [None]:
# Constants

AUDIO_MODEL = "whisper-1"
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"
PHI3 = "microsoft/Phi-3-mini-4k-instruct"
GEMMA2 = "google/gemma-2-2b-it"

In [None]:
# New capability - connect this Colab to my Google Drive

drive.mount("/content/drive")
audio_filename = "/content/drive/MyDrive/llms/denver_extract.mp3"

# Audio File

Google Drive:  
https://drive.google.com/file/d/1N_kpSojRR5RYzupz6nqM8hMSoEF_R7pU/view?usp=sharing


In [None]:
# Sign in to HuggingFace Hub

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
# Sign in to OpenAI using Secrets in Colab

openai_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=openai_api_key)

In [None]:
# Use the Whisper OpenAI model to convert the Audio to Text
# If you'd prefer to use an Open Source model, class student Youssef has contributed an open source version
# which I've added to the bottom of this colab  # from by using whisep of open ai I got the transcrpit

audio_file = open(audio_filename, "rb")
transcription = openai.audio.transcriptions.create(model=AUDIO_MODEL, file=audio_file, response_format="text")
print(transcription)

In [None]:
system_message = "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown."
user_prompt = f"Below is an extract transcript of a Denver council meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\n{transcription}"

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_prompt}
  ]


In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [1]:
import torch
torch.cuda.is_available()

device = "cuda" if torch.cuda.is_available() else "cpu"



In [2]:
#
tokenizer = AutoTokenizer.from_pretrained(PHI3)
tokenizer.pad_token = tokenizer.eos_token

inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)

streamer = TextStreamer(tokenizer)
model = AutoModelForCausalLM.from_pretrained(PHI3, device_map="auto") # quantization_config=quant_config I disiable bcs I did not have any gpu avialble
outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer)

NameError: name 'AutoTokenizer' is not defined

In [None]:
response = tokenizer.decode(outputs[0])

In [None]:
display(Markdown(response))

## Alternative implementation

In [None]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

# Define the model and processor
AUDIO_MODEL = "openai/whisper-medium"

# Load the speech recognition model with optimization for memory usage on GPU
speech_model = AutoModelForSpeechSeq2Seq.from_pretrained(
    AUDIO_MODEL,
    torch_dtype=torch.float16,  # Use float16 for memory efficiency
    low_cpu_mem_usage=True,     # Minimize CPU memory usage
    use_safetensors=True        # Safe tensor loading (saves memory and increases speed)
)

# Move the model to GPU if CUDA is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
speech_model.to(device)

# Load the processor for feature extraction and tokenization
processor = AutoProcessor.from_pretrained(AUDIO_MODEL)

# Set up the ASR pipeline
pipe = pipeline(
    task="automatic-speech-recognition",  # The task type
    model=speech_model,                  # The loaded speech model
    tokenizer=processor.tokenizer,       # Tokenizer for processing the text
    feature_extractor=processor.feature_extractor,  # Extractor for processing the audio features
    torch_dtype=torch.float16,  # Use 16-bit floating point for model weights
    device=0 if device == 'cuda' else -1,  # Automatically select GPU or CPU based on availability
)

In [None]:
# Use the Whisper OpenAI model to convert the Audio to Text
result = pipe(audio_filename)

In [None]:
transcription = result["text"]
print(transcription)