Dependencies


In [None]:
!apt-get install -y ffmpeg
!pip install transformers torchaudio bitsandbytes


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [3]:
RECORDINGS_FOLDER = '/content/drive/MyDrive/Meet Recordings'


In [None]:
import os

entries = os.listdir(RECORDINGS_FOLDER)

print(entries[6])

In [21]:
video_filepath = f'/content/drive/MyDrive/Meet Recordings/{entries[6]}'
audio_filepath = '/content/drive/MyDrive/Meet Recordings/meeting_audio.mp3'


In [None]:
!ffmpeg -i "{video_filepath}" -q:a 0 -map a "{audio_filepath}" -y


In [24]:
import torch
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor

In [None]:
AUDIO_MODEL = "openai/whisper-medium"
print("Loading ASR model...")
speech_model = AutoModelForSpeechSeq2Seq.from_pretrained(
    AUDIO_MODEL,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)
speech_model.to('cuda')
processor = AutoProcessor.from_pretrained(AUDIO_MODEL)
print("Model loaded!")





In [None]:
asr_pipeline = pipeline(
    "automatic-speech-recognition",
    model=speech_model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch.float16,
    device=0,
    return_timestamps=True
)

In [None]:
def transcribe_audio(audio_path):
    print(f"Transcribing: {audio_path}")
    result = asr_pipeline(audio_path)
    transcript = result["text"]
    print("Transcription completed:")
    print(transcript)
    return transcript

transcript = transcribe_audio(audio_filepath)

In [28]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


In [29]:
LLAMA_MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct"


In [30]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)


In [None]:
tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    LLAMA_MODEL,
    device_map="auto",
    quantization_config=quant_config
)


In [None]:
def generate_minutes(transcript_text):
    """
    Generate meeting minutes in markdown format from a transcript.
    The minutes include a summary, key discussion points, takeaways, and action items.
    """
    prompt = (
        "You are an assistant that produces meeting minutes from transcripts. "
        "Please generate meeting minutes in markdown format that strictly follow the template below:\n\n"
        "### Meeting Minutes\n\n"
        "**Summary:**\n"
        "- [Provide a concise summary of the meeting]\n\n"
        "**Discussion Points:**\n"
        "- [List each discussion point with details]\n\n"
        "**Takeaways:**\n"
        "- [List key takeaways from the meeting]\n\n"
        "**Action Items:**\n"
        "- [List action items along with the designated owners]\n\n"
        "Now, generate the meeting minutes for the following transcript:\n\n" + transcript_text +
        "\n\n### MEETING MINUTES START\n"
    )

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    output_ids = model.generate(
        **inputs,
        max_new_tokens=1024,
        do_sample=True,
        temperature=0.7
    )
    minutes = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return minutes

minutes_markdown = generate_minutes(transcript)
print("Generated Meeting Minutes:")
print(minutes_markdown)

In [39]:
import smtplib
from email.mime.text import MIMEText


In [None]:
def send_email(subject, body, recipient, sender_email, sender_password):
    msg = MIMEText(body, "plain")
    msg["Subject"] = subject
    msg["From"] = sender_email
    msg["To"] = recipient

    with smtplib.SMTP("smtp.gmail.com", 587) as server:
        server.starttls()
        server.login(sender_email, sender_password)
        server.sendmail(sender_email, [recipient], msg.as_string())
    print(f"Email sent to {recipient}.")

separator = "### MEETING MINUTES START"
if separator in minutes_markdown:
    cleaned_minutes = minutes_markdown.split(separator, 1)[1].strip()
else:
    cleaned_minutes = minutes_markdown

SENDER_EMAIL = "sarangshibu98@gmail.com"
SENDER_PASSWORD = "**********************"
RECIPIENT_EMAIL = "sarangshibu98@gmail.com"

send_email("Meeting Minutes", cleaned_minutes, RECIPIENT_EMAIL, SENDER_EMAIL, SENDER_PASSWORD)



