In [1]:
!pip install -q memvid PyPDF2

In [2]:
from PyPDF2 import PdfReader

from memvid import MemvidEncoder, MemvidChat
from google.colab import files, userdata


In [3]:
uploaded = files.upload()
pdf_path = list(uploaded.keys())[0]

Saving idfc-mitc.pdf to idfc-mitc.pdf


In [4]:
def extract_text_from_pdf(pdf_path, password=None):
    """
    Extracts text content from a PDF file, including support for password-protected (encrypted) PDFs.

    This function reads a PDF file page by page, decrypts it if needed,
    and extracts text from each page. Pages with no text (e.g., scanned images) are skipped.

    Args:
        pdf_path (str): Path to the input PDF file.
        password (str, optional): Password to decrypt the PDF, if it's encrypted. Defaults to None.

    Returns:
        List[str]: A list of extracted text strings, one per successfully processed page.

    Raises:
        ValueError: If the PDF is encrypted and no password is provided or if decryption fails.
    """
    reader = PdfReader(pdf_path)

    # Handle encrypted PDFs
    if reader.is_encrypted:
        if password is None:
            raise ValueError("PDF is encrypted. Please provide the password.")
        try:
            reader.decrypt(password)
        except Exception as e:
            raise ValueError("Failed to decrypt PDF. Is the password correct?") from e

    # Extract text from each page
    text_chunks = []
    for page_num, page in enumerate(reader.pages):
        text = page.extract_text()
        if text:
            text_chunks.append(text)
        else:
            print(f"No text extracted from page {page_num + 1} (may be scanned or image-based)")
    return text_chunks

In [5]:
from PyPDF2 import PdfReader
from memvid import MemvidEncoder

def split_text_into_chunks(text, max_length=400):
    """
    Splits a long string of text into smaller chunks of fixed maximum length.

    Args:
        text (str): The input text to split.
        max_length (int, optional): Maximum number of characters per chunk. Defaults to 400.

    Returns:
        List[str]: A list of text chunks.
    """
    return [text[i:i + max_length] for i in range(0, len(text), max_length)]


def extract_text_from_pdf(pdf_path):
    """
    Extracts text content from each page of a PDF file.

    Args:
        pdf_path (str): Path to the input PDF file.

    Returns:
        List[str]: A list of text content per page. Empty pages are skipped.
    """
    reader = PdfReader(pdf_path)
    text_chunks = []
    for page_num, page in enumerate(reader.pages):
        text = page.extract_text()
        if text:
            text_chunks.append(text)
        else:
            print(f"⚠️ No text found on page {page_num + 1}. It may be scanned or image-based.")
    return text_chunks


def build_memvid_from_pdf(pdf_path, video_file="/content/memory.mp4", index_file="/content/memory_index.json", password=None):
    """
    Converts a PDF document into a searchable AI memory stored in a video file using Memvid.

    This function performs the following steps:
        1. Extracts text from the PDF.
        2. Splits text into smaller chunks.
        3. Encodes chunks into a video using MemvidEncoder.
        4. Saves the video (.mp4) and index (.json) for later retrieval.

    Args:
        pdf_path (str): Path to the input PDF file.
        video_file (str, optional): Output path for the generated MP4 memory file. Defaults to '/content/memory.mp4'.
        index_file (str, optional): Output path for the memory index JSON file. Defaults to '/content/memory_index.json'.
        password (str, optional): Password to decrypt the PDF if it is encrypted. Currently unused.

    Returns:
        None
    """
    raw_chunks = extract_text_from_pdf(pdf_path)

    # Further split page-wise text into smaller chunks for QR encoding compatibility
    chunks = []
    for page_text in raw_chunks:
        chunks.extend(split_text_into_chunks(page_text, max_length=400))  # Keep chunks small for video encoding

    print(f"Total chunks prepared: {len(chunks)}")

    # Initialize encoder and build the memory video
    encoder = MemvidEncoder()
    encoder.add_chunks(chunks)
    encoder.build_video(video_file, index_file)

    print(f"Memory video saved as: {video_file}")
    print(f"Index file saved as: {index_file}")


In [6]:
def chat_with_memvid(
        video_file="/content/memory.mp4",
        index_file="/content/memory_index.json"):
    """
    Launches an interactive chat session with a Memvid video memory using an OpenAI-powered LLM.

    This function initializes a MemvidChat instance using a pre-built memory (MP4 + index),
    starts the session, and enters a REPL-style loop where the user can ask questions
    about the contents stored in the video memory.

    Args:
        video_file (str): Path to the memory video (.mp4) file. Defaults to '/content/memory.mp4'.
        index_file (str): Path to the memory index (.json) file. Defaults to '/content/memory_index.json'.

    Requirements:
        - The environment variable or `userdata` dictionary must contain a valid 'OPENAI_API_KEY'.
        - The video and index files must have been previously created using MemvidEncoder.
        - `MemvidChat` should be imported from the memvid library.

    Behavior:
        - Continues until the user types 'exit' or 'quit'.
        - Uses OpenAI's GPT-3.5-turbo for LLM responses.

    Returns:
        None
    """
    chat = MemvidChat(
        video_file,
        index_file,
        llm_provider="openai",
        llm_model="gpt-3.5-turbo",
        llm_api_key=userdata.get('OPENAI_API_KEY')  # Assumes key is stored securely in userdata
    )

    chat.start_session()  # Load memory into searchable format

    # Interactive question-answer loop
    while True:
        query = input("Ask something (or type 'exit'): ")
        if query.lower() in ["exit", "quit"]:
            break
        response = chat.chat(query)
        print("Bot:", response)

In [7]:
build_memvid_from_pdf(pdf_path)
chat_with_memvid()

⚠️ No text found on page 26. It may be scanned or image-based.
Total chunks prepared: 189


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Generating QR frames: 100%|██████████| 189/189 [00:45<00:00,  4.17it/s]


🐛 FRAMES: 189 files in /tmp/tmpmonnhcq0/frames
🐛 FFMPEG: frames=/tmp/tmpmonnhcq0/frames → docker_mount=/tmp/tmpmonnhcq0


Writing video frames: 100%|██████████| 189/189 [00:00<00:00, 246.22it/s]


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

Memory video saved as: /content/memory.mp4
Index file saved as: /content/memory_index.json
Using openai for responses.
--------------------------------------------------
Ask something (or type 'exit'): what is this document about ?
Bot: The document mentioned in the contexts provided is about the Most Important Terms and Conditions (MITC) related to a card offered by IDFC FIRST Bank and its partner. It covers various aspects such as card usage guidelines, consequences of certain actions (like delay in payments or misuse of the card), necessary information sharing with co-branded partners, and grievance redressal procedures. It also includes details on rewards programs, portfolio statistical analysis, and compliance with relevant laws and regulations.
Ask something (or type 'exit'): whats the best card for people with annual income less than 5lakhs
Bot: Based on the provided contexts from the knowledge base, the best card for people with an annual income less than 5 lakhs could be the F

| Feature / Approach          | **Memvid (Video-as-DB)**        | **FAISS / Qdrant**          | **Cloud RAG (LangChain, Pinecone)** | **Local RAG + Ollama / Llama.cpp**   |
| --------------------------- | ------------------------------- | --------------------------- | ----------------------------------- | ------------------------------------ |
| **Storage Format**          | `.mp4` + `.json`                | Vector index                | Cloud vector store                  | Local vector index                   |
| **Database Needed**         | ❌ None                          | ✅ Yes (in-memory/on-disk)   | ✅ Yes (e.g., Pinecone, Weaviate)    | ✅ Yes (e.g., Chroma, FAISS)          |
| **Internet Required**       | ❌ No                            | ❌ No                        | ✅ Yes                               | ❌ No                                 |
| **LLM Integration**         | OpenAI, Claude, Gemini          | Pluggable                   | API-based (OpenAI, Cohere, etc.)    | Local models (Mistral, LLaMA2, etc.) |
| **Offline Compatibility**   | ✅ Full                          | ✅ Full                      | ❌ No                                | ✅ Full                               |
| **File Format Portability** | ✅ `.mp4` shareable              | ❌ Model-dependent           | ❌ Cloud-locked                      | ✅ Local disk                         |
| **Setup Complexity**        | 🟢 Super simple                 | 🟡 Moderate                 | 🔴 High                             | 🟡 Moderate                          |
| **Use Case Fit**            | Portable AI memory, quick demos | Production RAG, fast search | Scalable enterprise-grade RAG       | Privacy-first apps, dev workflows    |
| **Scalability**             | 🚫 Limited by video encoding    | ✅ Highly scalable           | ✅ Cloud autoscale                   | 🟡 Scales with local resources       |
| **Security & Privacy**      | ✅ Strong (local-only)           | ✅ Strong (local option)     | ❌ Data leaves system                | ✅ Strong (no external access)        |
| **Ideal For**               | Students, demos, offline tools  | Internal RAG systems        | Customer-facing apps, chatbots      | Hackers, tinkerers, local assistants |
