In [None]:
!pip install -U openai-whisper
!apt-get install ffmpeg
!python -m spacy download en_core_web_sm
!apt-get install fonts-noto
!pip install -U openai-whisper
!pip install fpdf
!pip install transformers torch

In [None]:
!pip install -U langchain langchain-community transformers

In [None]:
# meeting_minutes_langchain.py
# Converts audio meeting transcript into structured minutes using LangChain

import os
from datetime import datetime
import re

import whisper
import spacy
from transformers import pipeline
from fpdf import FPDF

from langchain import LLMChain, PromptTemplate
from langchain.chains import SimpleSequentialChain
from langchain.llms import HuggingFacePipeline

# 1. Load Whisper for transcription
audio_file_path = "/content/2025-04-01 Council Meeting.mp3"
whisper_model = whisper.load_model("base")
print("🔹 Transcribing audio...")
transcription_result = whisper_model.transcribe(audio_file_path)
transcript_text = transcription_result["text"].strip()
print("🔹 Transcription complete.")

# 2. Prepare summarization pipeline via HuggingFace and LangChain
print("🔹 Loading summarization model...")
summarization_pipeline = pipeline(
    "summarization",
    model="facebook/bart-large-cnn",
    device=-1,
    min_length=100,
    max_length=2000,
    do_sample=False
)
summarization_llm = HuggingFacePipeline(pipeline=summarization_pipeline)
summary_prompt = PromptTemplate(
    input_variables=["text"],
    template="""
You are an AI assistant tasked with summarizing meeting transcripts.
Summarize the following meeting transcript into a concise overview:

{text}
"""
)
summarization_chain = LLMChain(llm=summarization_llm, prompt=summary_prompt)

# 3. Prepare extraction chains for action items, next steps, closing remarks
action_prompt = PromptTemplate(
    input_variables=["text"],
    template="""
Extract the action items and responsibilities from this meeting transcript:

{text}
"""
)
action_chain = LLMChain(llm=summarization_llm, prompt=action_prompt)

next_prompt = PromptTemplate(
    input_variables=["text"],
    template="""
List the next steps and follow-up actions from this meeting transcript:

{text}
"""
)
next_chain = LLMChain(llm=summarization_llm, prompt=next_prompt)

closing_prompt = PromptTemplate(
    input_variables=["text"],
    template="""
Summarize the closing remarks of this meeting transcript:

{text}
"""
)
closing_chain = LLMChain(llm=summarization_llm, prompt=closing_prompt)

# 4. Run chains on transcript (taking a 700-token sample for speed)
print("🔹 Running summarization and extraction chains...")
short_transcript = " ".join(transcript_text.split()[:700])
meeting_overview = summarization_chain.run(text=short_transcript)
action_items = action_chain.run(text=short_transcript)
next_steps = next_chain.run(text=short_transcript)
closing_remarks = closing_chain.run(text=short_transcript)
print("🔹 Chains complete.")

# 5. Extract date, attendees via spaCy
date_match = re.search(
    r"\b(\d{1,2} (?:January|February|March|April|May|June|July|August|September|October|November|December) \d{4})\b",
    transcript_text
)
meeting_date = date_match.group(0) if date_match else datetime.now().strftime("%d %B %Y")
nlp = spacy.load("en_core_web_sm")
doc = nlp(transcript_text)
attendees = list({ent.text for ent in doc.ents if ent.label_ == "PERSON"})
attendee_names = attendees if attendees else ["Not Specified"]

# 6. Generate PDF of minutes
class MeetingMinutesPDF(FPDF):
    def header(self):
        self.set_font("Arial", "B", 16)
        self.cell(0, 10, "MEETING MINUTES", ln=True, align="C")
        self.ln(5)
        self.rect(5.0, 5.0, 200.0, 287.0)

    def footer(self):
        self.set_y(-15)
        self.set_font("Arial", "I", 10)
        self.cell(0, 10, f"Page {self.page_no()}", align="C")

    def add_section(self, title, content):
        self.set_font("Arial", "B", 12)
        self.cell(0, 8, title, ln=True, border="B")
        self.ln(3)
        self.set_font("Arial", size=11)
        self.multi_cell(0, 7, content)
        self.ln(5)

    def add_bullet_points(self, title, items):
        self.set_font("Arial", "B", 12)
        self.cell(0, 8, title, ln=True, border="B")
        self.ln(3)
        self.set_font("Arial", size=11)
        displayed = items if len(items) <=10 else items[:10] + [f"+{len(items)-10} more attendees"]
        for itm in displayed:
            self.cell(5)
            self.cell(0, 7, f"- {itm}", ln=True)
        self.ln(5)

pdf = MeetingMinutesPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.add_section("Date", meeting_date)
pdf.add_section("Time", datetime.now().strftime("%I:%M %p"))
pdf.add_bullet_points("Attendees", attendee_names)
pdf.add_section("Meeting Overview", meeting_overview)
pdf.add_section("Action Items & Responsibilities", action_items)
pdf.add_section("Next Steps & Follow-ups", next_steps)
pdf.add_section("Closing Remarks", closing_remarks)

output_path = "Meeting_Minutes_LangChain.pdf"
pdf.output(output_path)
print(f"🔹 PDF saved as {output_path}")
