<a href="https://colab.research.google.com/github/sokkerstar123/Capstone/blob/main/notebook/legacy_rag_experiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Phase I: Retrieval-Augmented Generation (RAG) Prototype

**Note:** This notebook documents the initial experimental approach using RAG. While this method was eventually superseded by Fine-Tuning (Knowledge Distillation) in the final product, the logic below demonstrates the vector retrieval architecture designed during Week 2.

**Dependencies:**
This notebook requires `faiss-cpu`, `sentence-transformers`, and `transformers`.

In [None]:
# Install legacy dependencies if running independently
!pip install -q faiss-cpu sentence-transformers transformers pandas nltk datasets

import pandas as pd
import numpy as np
import faiss
import nltk
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from sentence_transformers import SentenceTransformer

# Setup models (Simulated setup for RAG)
model_name = "sentence-transformers/all-MiniLM-L6-v2"
embedding_model = SentenceTransformer(model_name)

# Placeholder for tokenizer/generator (using baseline T5 or BART as per original experiment)
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
generator = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")

In [None]:
def chunk_text(text, chunk_size=100, overlap=20):
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = ' '.join(words[i:i+chunk_size])
        if chunk:
            chunks.append(chunk)
    return chunks

# Mock retrieval function for demonstration context
def retrieve_context(query_text, top_k=3):
    # Encode query
    query_emb = model.encode([query_text])
    # Search for top_k similar chunks
    D, I = index.search(np.array(query_emb), top_k)
    # Return the retrieved chunk texts
    return [all_chunks[i] for i in I[0]]

def get_first_n_sentences(text, n):
    sentences = nltk.sent_tokenize(text)
    return ' '.join(sentences[:n])

def construct_rag_prompt(main_text, retrieved_chunks, max_sentences=3):
    context_section = "\n".join(retrieved_chunks)
    prompt = f"""You are an expert summarizer. Given the following transcript and relevant context, generate a concise summary.
    Transcript:
    {main_text}
    Retrieved context:
    {context_section}
    Limit your summary to {max_sentences} sentences.
    """
    return prompt

# Core Function
def getRagSummary(transcript, exclude_video_id, n=11, top_k=5, max_sentences=2):

    main_text = get_first_n_sentences(transcript, n=n)
    retrieved = retrieve_context(main_text, top_k=top_k, exclude_video_id=exclude_video_id)
    prompt = construct_rag_prompt(main_text, retrieved, max_sentences=max_sentences)
    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    # Generate output
    # Note: Using the model.generate method directly as per original design
    outputs = generator.generate(**inputs, max_length=150)
    # Decode the result
    rag_summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return rag_summary