In [None]:
# --- 1. SETUP AND INSTALLATIONS ---
# Run this once to install the necessary packages.
# !pip install transformers torch accelerate SQLAlchemy

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
import os
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.orm import sessionmaker, declarative_base

# --- 2. CONFIGURATION ---
JSON_TRANSCRIPT_PATH = r"C:\Users\apran\Videos\Cin\LIBRARY\Meeting Agent\transcript.json"
MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat"
DATABASE_URL = "sqlite:///tasks.db"

# --- 3. DATABASE DEFINITION ---
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()

class Task(Base):
    __tablename__ = "tasks"
    id = Column(Integer, primary_key=True, index=True)
    description = Column(String, index=True)
    assignee = Column(String)
    due_date_str = Column(String)
    status = Column(String, default="To Do")

# Create the database and table
Base.metadata.create_all(bind=engine)
print("✅ Database is ready.")

# --- 4. HELPER FUNCTIONS ---
def load_transcript_from_json(file_path):
    """Loads and formats the transcript from your JSON file."""
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        formatted_transcript = f"Meeting Title: {data['meeting_title']}\nParticipants: {', '.join(data['participants'])}\n\n"
        for entry in data['transcript']:
            formatted_transcript += f"{entry['speaker']} ({entry['timestamp']}): {entry['dialogue']}\n"
        return formatted_transcript
    except Exception as e:
        print(f"❌ Error loading transcript: {e}")
        return None

def process_transcript(transcript, model, tokenizer):
    """Generates MoM and extracts tasks using the LLM."""
    prompt = f"""
    Analyze the following transcript. Your task is to:
    1. Generate a concise "Minutes of Meeting" summary.
    2. Extract all action items into a structured JSON format.

    Respond with a single JSON object containing "minutes" (a string) and "tasks" (a list of objects).
    Each task object must have keys: "task_description", "assignee", and "due_date".

    Transcript:\n---\n{transcript}\n---
    """
    messages = [{"role": "system", "content": "You are a helpful assistant that processes meeting transcripts."}, {"role": "user", "content": prompt}]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    
    generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
    generated_ids = [out[len(inp):] for inp, out in zip(model_inputs.input_ids, generated_ids)]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    try:
        return json.loads(response[response.find('{'):response.rfind('}')+1])
    except json.JSONDecodeError:
        print("❌ Error: Could not decode JSON from the model's response.")
        return None

# --- 5. EXECUTION ---
print("Loading transcript...")
meeting_transcript = load_transcript_from_json(JSON_TRANSCRIPT_PATH)

if meeting_transcript:
    print("Loading AI model... (This can take a moment)")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
    
    print("Processing with AI...")
    ai_results = process_transcript(meeting_transcript, model, tokenizer)
    
    if ai_results:
        print("\n--- 📝 Generated Minutes of Meeting ---")
        print(ai_results.get("minutes", "No summary generated."))
        print("\n--- 📌 Extracted Action Items ---")
        tasks_to_save = ai_results.get("tasks", [])
        if tasks_to_save:
            for task in tasks_to_save:
                print(f"- {task}")
        else:
            print("No action items were extracted.")
    else:
        tasks_to_save = [] # Ensure tasks_to_save is defined
else:
    tasks_to_save = [] # Ensure tasks_to_save is defined

In [None]:
def update_db_and_print_tasks(tasks):
    """Clears old tasks, saves new ones, and prints all tasks from the DB."""
    db = SessionLocal()
    try:
        # Clear existing tasks to avoid duplicates on re-runs
        num_deleted = db.query(Task).delete()
        if num_deleted > 0:
            print(f"\n🗑️ Cleared {num_deleted} old tasks from the database.")
        
        # Add new tasks
        for task_item in tasks:
            new_task = Task(
                description=task_item.get("task_description"),
                assignee=task_item.get("assignee"),
                due_date_str=task_item.get("due_date"),
            )
            db.add(new_task)
        db.commit()
        if tasks:
            print(f"✅ Successfully saved {len(tasks)} new tasks to the database.")

        # Read all tasks back from the database to confirm
        print("\n--- 📖 Current Tasks in Database ---")
        all_tasks_in_db = db.query(Task).all()
        if not all_tasks_in_db:
            print("No tasks are currently in the database.")
        else:
            for i, task in enumerate(all_tasks_in_db, 1):
                print(f"  {i}. Task:      {task.description}")
                print(f"     Assignee:  {task.assignee}")
                print(f"     Due Date:  {task.due_date_str}")
                print(f"     Status:    {task.status}\n")
    finally:
        db.close()

# Run the function with the tasks extracted from the previous cell
if 'tasks_to_save' in locals():
    update_db_and_print_tasks(tasks_to_save)
else:
    print("No tasks to save. Please run the first cell to process the transcript.")