AI MEETING MINUTES GENERATOR


CELL 1 — INSTALL ALL REQUIRED LIBRARIES


In [None]:
# Install all required libraries with fixed versions to avoid compatibility issues
!pip install --force-reinstall \
  torch==2.5.1 \
  torchvision==0.20.1 \
  torchaudio==2.5.1 \
  transformers==4.41.2 \
  pyannote.audio==3.3.2 \
  openai-whisper \
  pydub \
  nltk \
  huggingface_hub \
  numpy==1.26.4 \
  pandas==2.2.2 \
  accelerate sentencepiece bitsandbytes protobuf==3.20.3


CELL 2 — VERIFY PYTORCH + GPU

In [None]:
import torch
import torchvision

print("Torch version:", torch.__version__)          # Check PyTorch version
print("Torchvision version:", torchvision.__version__)  # Check Torchvision version
print("CUDA available:", torch.cuda.is_available()) # Check if GPU is enabled

# Print GPU name if available
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))

CELL 3 — HUGGING FACE LOGIN (FOR PYANNOTE)

In [None]:
from huggingface_hub import login

# Login to Hugging Face to access private models like pyannote
login("our_generated_token")


CELL 4 — SPEAKER DIARIZATION (WHO SPOKE WHEN)

In [None]:
from pyannote.audio import Pipeline

# Load pretrained speaker diarization pipeline
diarization_pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization",
    use_auth_token=True
)

# Run diarization on meeting audio file
diarization = diarization_pipeline("audio.wav")

# Print speaker segments with start and end times
for turn, _, speaker in diarization.itertracks(yield_label=True):
    print(f"{speaker}: {turn.start:.2f}s → {turn.end:.2f}s")

CELL 5 — SPLIT AUDIO BY SPEAKER (Pydub)

In [None]:
from pydub import AudioSegment
import os

# Load the full meeting audio
audio = AudioSegment.from_wav("audio.wav")

# Create directory to store speaker-wise audio chunks
os.makedirs("speaker_chunks", exist_ok=True)

chunks = []  # List to store (speaker, audio_file)

# Loop through diarization results
for i, (turn, _, speaker) in enumerate(diarization.itertracks(yield_label=True)):
    start_ms = int(turn.start * 1000)  # Convert seconds to milliseconds
    end_ms = int(turn.end * 1000)

    # Extract speaker-specific audio chunk
    chunk = audio[start_ms:end_ms]

    # Save chunk as separate audio file
    filename = f"speaker_chunks/{speaker}_{i}.wav"
    chunk.export(filename, format="wav")

    # Store speaker and file path
    chunks.append((speaker, filename))

print("Audio chunks created:")
for c in chunks:
    print(c)

CELL 6 — SPEECH TO TEXT USING WHISPER

In [None]:
import whisper

# Load Whisper speech-to-text model
whisper_model = whisper.load_model("base")

speaker_transcripts = []  # Store transcripts per speaker

# Transcribe each speaker audio chunk
for speaker, audio_file in chunks:
    result = whisper_model.transcribe(audio_file)  # Convert speech to text
    text = result["text"].strip()

    speaker_transcripts.append({
        "speaker": speaker,
        "text": text
    })

# Print speaker-wise transcripts
for item in speaker_transcripts:
    print(f"{item['speaker']}: {item['text']}")

WORD ERROR RATE

In [None]:
#
!pip install jiwer


In [None]:
from jiwer import wer


In [None]:
#Step 3: Create REFERENCE TEXT
reference_text = """
SPEAKER_01: Good morning everyone. Today we have our weekly team meeting. We will discuss the logging module, backend API progress, and frontend updates.
SPEAKER_02: The backend API for authentication is almost ready, but validation and error handling still need to be added by Friday.
SPEAKER_00: I will take care of integrating the frontend with the backend by Monday, and the testing team will start functional testing next week.

"""


In [None]:
#Step 4: Create WHISPER PREDICTED TEXT
predicted_text = ""

for item in speaker_transcripts:
    predicted_text += item["text"] + " "


In [None]:
#Step 5: Calculate WER
error_rate = wer(reference_text.lower(), predicted_text.lower())

print("Word Error Rate (WER):", error_rate)
print("WER Percentage:", error_rate * 100, "%")


CELL 7 — MERGE SPEAKER TRANSCRIPTS

In [None]:
full_meeting_text = ""

# Combine all speaker transcripts into one meeting text
for item in speaker_transcripts:
    full_meeting_text += f"{item['speaker']}: {item['text']}\n"

print(full_meeting_text)

CELL 8 — LIGHT NLP CLEANING

In [None]:
import re

# Remove extra spaces and normalize text
clean_text = re.sub(r'\s+', ' ', full_meeting_text).strip()
print(clean_text)

CELL 9 — MEETING SUMMARIZATION (BART)

In [None]:
from transformers import pipeline

# Load summarization pipeline using BART
summarizer = pipeline(
    "summarization",
    model="facebook/bart-large-cnn"
)

# Generate meeting summary
summary = summarizer(
    clean_text,
    max_length=60,
    min_length=40,
    do_sample=False  # Deterministic summary
)

summary_text = summary[0]["summary_text"]
print("SUMMARY:\n", summary_text)


CELL 10 — LOAD MISTRAL AGENT (GPU + 4-BIT)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)

tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

print("✅ Mistral loaded in FP16 on GPU")


CELL 11 — AGENTIC AI PROMPT (MOST IMPORTANT)

In [None]:
agent_prompt = f"""
You are an AI meeting assistant designed for structured meeting analysis.

You must perform TWO SEPARATE TASKS using DIFFERENT INPUTS.

=================================================
TASK 1: BULLET POINT GENERATION (FROM SUMMARY)
=================================================
Generate clear bullet points from the MEETING SUMMARY.

RULES FOR BULLET POINTS:
- Capture key discussion topics and decisions.
- Do NOT include implementation details.
- Do NOT include deadlines or task ownership.
- Each bullet point must be one concise sentence.
- Maximum 5 bullet points.

Meeting Summary:
{summary_text}

=================================================
TASK 2: ACTION ITEM EXTRACTION (FROM FULL TEXT)
=================================================
Extract ONLY explicit action items from the FULL MEETING TRANSCRIPT.

DEFINITION RULES:
1. A TASK is a clearly assigned activity.
2. An OWNER is the person explicitly named as responsible.
   - Use the exact name as written.
   - Do NOT infer from speaker labels.
3. A DEADLINE can be:
   - A date (e.g., 12 June)
   - A day name (e.g., Friday)
   - A relative time (e.g., next week)

STRICT RULES:
- Extract ONLY explicit action items (not discussions).
- Do NOT guess or infer missing details.
- If OWNER is missing, write "Not mentioned".
- If DEADLINE is missing, write "Not mentioned".
- Number each action item starting from 1.
- Use EXACT format:
  1. Task | Owner | Deadline

Full Meeting Transcript:
{full_meeting_text}

=================================================
FINAL OUTPUT FORMAT (DO NOT CHANGE)
=================================================

Bullet Points:
- Bullet point 1
- Bullet point 2
- Bullet point 3

Action Items:
1. Task | Owner | Deadline
2. Task | Owner | Deadline
"""


In [None]:
# ---------- TOKENIZE PROMPT (CREATES inputs) ----------
inputs = tokenizer(
    agent_prompt,
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=1024
)


In [None]:
# Decode full output
outputs = model.generate(
    **inputs,
    max_new_tokens=300,
    do_sample=False
)

full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

CELL 12 — RUN MISTRAL AGENT (FINAL OUTPUT)

In [None]:


import re

# Remove prompt text
clean_text = full_output.replace(agent_prompt, "").strip()

# Remove stray header lines
clean_text = re.sub(
    r'^\d+\.\s*Task\s*\|\s*Owner\s*\|\s*Deadline\s*$',
    '',
    clean_text,
    flags=re.MULTILINE
)

# Remove separators
clean_text = re.sub(
    r'=+\n?',
    '',
    clean_text
).strip()

# Insert header correctly
if "Action Items:" in clean_text:
    parts = clean_text.split("Action Items:")
    bullet_section = parts[0].strip()
    action_section = parts[1].strip()

    final_output = (
        bullet_section
        + "\n\nTask | Owner | Deadline\n"
        + action_section
    )
else:
    final_output = clean_text

# OUTPUT tokens
output_token_count = len(tokenizer.tokenize(final_output))
print("Output tokens:", output_token_count)

print("\nFINAL OUTPUT:\n")
print(final_output)
