In [None]:
!pip install --quiet pyannote.audio==3.4.0
!pip install --quiet git+https://github.com/openai/whisper.git
!pip install --quiet torch torchaudio soundfile pydub huggingface_hub transformers jiwer streamlit sounddevice vosk

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
from pyannote.audio import Pipeline

# Load the speaker diarization pipeline
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization",use_auth_token=True)

print("✅ Diarization pipeline loaded successfully")

In [None]:
from google.colab import files
uploaded = files.upload()  # Upload ES2004a.wav

from pydub import AudioSegment
audio = AudioSegment.from_wav("ES2004a.wav")
short_audio = audio[:60000]  # first 60 seconds
short_audio.export("short_ES2004a.wav", format="wav")
print("✅ Audio trimmed successfully")

In [None]:
import whisper
model = whisper.load_model("base")  # use "small" if GPU is strong

result = model.transcribe("short_ES2004a.wav")

for seg in result["segments"]:
    print(f"{seg['start']:.1f}s - {seg['end']:.1f}s: {seg['text']}")


In [None]:
# 🧪 Evaluate baseline accuracy using WER and CER
from jiwer import wer, cer

reference = "Hello everyone welcome to the meeting today we will discuss project updates"
hypothesis = "Hello every one welcome meeting today we discuss project update"

print(f"WER (Word Error Rate): {wer(reference, hypothesis):.2f}")
print(f"CER (Character Error Rate): {cer(reference, hypothesis):.2f}")


In [None]:
# 5️⃣ Load Diarization pipeline
from pyannote.audio import Pipeline
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=True)
print("✅ Diarization pipeline loaded successfully")


In [None]:
import torch
pipeline.to(torch.device("cuda"))
diarization = pipeline("short_ES2004a.wav")

for turn, _, speaker in diarization.itertracks(yield_label=True):
    print(f"{turn.start:.1f}s - {turn.end:.1f}s: {speaker}")

In [None]:
final_output = []
for seg in result["segments"]:
    whisper_start = seg["start"]
    whisper_end = seg["end"]
    text = seg["text"]

    for turn, _, speaker in diarization.itertracks(yield_label=True):
        if turn.start <= whisper_start <= turn.end or turn.start <= whisper_end <= turn.end:
            final_output.append({
                "speaker": speaker,
                "start": whisper_start,
                "end": whisper_end,
                "text": text
            })
            break


In [None]:
with open("meeting_transcript.txt", "w") as f:
      for entry in final_output:
              f.write(f"{entry['start']:.1f}s - {entry['end']:.1f}s: {entry['speaker']} → {entry['text']}\n")

              print("✅ Transcript saved to meeting_transcript.txt")


In [None]:
import whisper

model = whisper.load_model("base")  # or "tiny" if GPU is weak
result = model.transcribe("short_ES2004a.wav")

# Check if transcription exists
if result["segments"]:
    print("✅ Whisper transcription successful")
    for seg in result["segments"]:
        print(f"{seg['start']:.1f}s - {seg['end']:.1f}s: {seg['text']}")
else:
    print("❌ Whisper could not detect any speech in the audio")


In [None]:
from pyannote.audio import Pipeline
import torch

pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=True)
pipeline.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

diarization = pipeline("short_ES2004a.wav")

for turn, _, speaker in diarization.itertracks(yield_label=True):
    print(f"{turn.start:.1f}s - {turn.end:.1f}s: {speaker}")


In [None]:
final_output = []

for seg in result["segments"]:
    start = seg["start"]
    end = seg["end"]
    text = seg["text"]

    for turn, _, speaker in diarization.itertracks(yield_label=True):
        if turn.start <= start <= turn.end or turn.start <= end <= turn.end:
            final_output.append({
                "speaker": speaker,
                "start": start,
                "end": end,
                "text": text
            })
            break

    print(f"✅ final_output has {len(final_output)} entries")


In [None]:
# Check if final_output has data
if not final_output:
    print("❌ No transcription data available. Check your Whisper/diarization steps.")
else:
    print(f"✅ final_output has {len(final_output)} segments.")
    for entry in final_output:
        print(entry)


In [None]:
from transformers import pipeline
import torch

# Combine all transcript texts
all_text = " ".join([entry["text"] for entry in final_output]).strip()

if all_text:
    # Split into chunks if text is long
    max_chunk = 1000
    summary_chunks = [all_text] if len(all_text) <= max_chunk else [all_text[i:i+max_chunk] for i in range(0, len(all_text), max_chunk)]

    # Set device (GPU if available)
    device = 0 if torch.cuda.is_available() else -1
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)

    summaries = []
    for chunk in summary_chunks:
        summary = summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
        summaries.append(summary.strip())

    final_summary = " ".join(summaries)
    print("📌 Generated Summary:\n", final_summary)

    # Save summary
    with open("meeting_summary.txt", "w") as f:
        f.write(final_summary)
else:
    print("❌ No text available to summarize.")


In [None]:
!pip install --quiet evaluate rouge_score

from evaluate import load

rouge = load("rouge")

# Reference and Hypothesis Summaries
reference_summary = (
    "The meeting discussed project updates, including timelines, tasks, "
    "and responsibilities. Team agreed to complete module 1 and 2 by next week. "
    "Action items were assigned to each speaker."
)

hypothesis_summary = final_summary  # From your summarization step

results = rouge.compute(predictions=[hypothesis_summary], references=[reference_summary])
print("📊 ROUGE Evaluation Results:", results)


In [None]:
from transformers import pipeline

# Combine all texts
all_text = " ".join([entry["text"] for entry in final_output])

# Summarize
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0)
summary_chunks = [all_text[i:i+1000] for i in range(0, len(all_text), 1000)]
summaries = [summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text'] for chunk in summary_chunks]
final_summary = " ".join(summaries)

print("📌 Generated Summary:\n", final_summary)


In [None]:
from collections import defaultdict
speaker_notes = defaultdict(list)

for entry in final_output:
    speaker_notes[entry['speaker']].append(entry['text'])

    for speaker, texts in speaker_notes.items():
        print(f"\n🔹 Notes for {speaker}:")
        print(" ".join(texts))

In [None]:
from jiwer import wer, cer

reference = "Hello everyone welcome to the meeting today we will discuss project updates"
hypothesis = "Hello every one welcome meeting today we discuss project update"

print(f"WER: {wer(reference, hypothesis):.2f}")
print(f"CER: {cer(reference, hypothesis):.2f}")


In [None]:
from pydub import AudioSegment
import os

# Load your audio
audio = AudioSegment.from_wav("ES2004a.wav")

# Split into 5-minute chunks
chunk_length_ms = 5 * 60 * 1000  # 5 minutes
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]

# Export chunks
os.makedirs("chunks", exist_ok=True)
for i, chunk in enumerate(chunks):
    chunk.export(f"chunks/chunk_{i}.wav", format="wav")
    print(f"✅ Saved chunk_{i}.wav")


In [None]:
import whisper

model = whisper.load_model("base")  # choose 'tiny' if GPU is slow
full_text = ""

for i in range(len(chunks)):
    result = model.transcribe(f"chunks/chunk_{i}.wav")
    print(f"--- Chunk {i} ---")
    print(result["text"])
    full_text += result["text"] + " "

print("\nFinal Transcript:\n", full_text)


In [None]:
# Combine everything neatly
with open("full_meeting_report.txt", "w") as f:
    f.write("🎙 FINAL TRANSCRIPT (Speaker-wise)\n")
    for entry in final_output:
        f.write(f"{entry['start']:.1f}s–{entry['end']:.1f}s: "
                f"{entry['speaker']} → {entry['text']}\n")
    f.write("\n📌 MEETING SUMMARY\n")
    f.write(final_summary + "\n\n")
    f.write("📊 ROUGE RESULTS\n")
    f.write(str(results) + "\n")
print("✅ full_meeting_report.txt saved successfully!")


In [None]:
from google.colab import files
files.download("full_meeting_report.txt")
