# 🎙️ AI Meeting Summarizer + Diarizer (Colab Notebook)

In [None]:
!pip install -q git+https://github.com/openai/whisper.git
!pip install -q pyannote.audio transformers librosa --upgrade

import whisper
import torch
from transformers import pipeline
import os

# ⚠️ REQUIRED: Hugging Face token for diarization
HUGGINGFACE_TOKEN = "hf_XXXXXXXXXXXXXXXXXXXX"


In [None]:
# ⬆️ Upload Audio
from google.colab import files
uploaded = files.upload()

audio_path = list(uploaded.keys())[0]


In [None]:
# 🔊 Whisper Transcription
print("Loading Whisper large model...")
model = whisper.load_model("large")

print("Transcribing...")
result = model.transcribe(audio_path)
transcript = result["text"]

with open("transcript.txt", "w") as f:
    f.write(transcript)


In [None]:
# 👥 Speaker Diarization
from pyannote.audio import Pipeline

print("Loading diarization pipeline...")
pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization", use_auth_token=HUGGINGFACE_TOKEN
)

print("Running diarization...")
diarization = pipeline(audio_path)

# ⬇️ Format diarization
segments = []
for turn, _, speaker in diarization.itertracks(yield_label=True):
    segments.append(f"[{speaker}] {turn.start:.1f}s - {turn.end:.1f}s")

with open("diarization.txt", "w") as f:
    f.write("\n".join(segments))


In [None]:
# ✂️ Summarization (Pegasus)
print("Summarizing with Pegasus...")
summarizer = pipeline("summarization", model="google/pegasus-xsum")
summary = summarizer(transcript, max_length=120, min_length=30, do_sample=False)[0]["summary_text"]

with open("summary.txt", "w") as f:
    f.write(summary)


In [None]:
# 📁 Download files
files.download("transcript.txt")
files.download("diarization.txt")
files.download("summary.txt")
