In [9]:
import getpass
api_key = getpass.getpass("Enter your GROQ API key: ")

In [10]:
import dspy
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage

class GroqLM(dspy.LM):
    def __init__(self, model="llama-3.3-70b-versatile", **kwargs):
        super().__init__(model=model, **kwargs)
        self.model_name = model
        self.client = ChatGroq(model=model,
                               api_key=api_key,)

    def __call__(self, prompt=None, messages=None, **kwargs):
        # Handle both prompt and messages format
        if messages:
            # Convert DSPy messages to LangChain format
            lc_messages = []
            for msg in messages:
                if msg.get("role") == "system":
                    lc_messages.append(SystemMessage(content=msg["content"]))
                elif msg.get("role") == "user":
                    lc_messages.append(HumanMessage(content=msg["content"]))
            response = self.client.invoke(lc_messages)
        else:
            # Handle simple prompt format
            response = self.client.invoke([HumanMessage(content=prompt)])
        
        return [response.content]


In [11]:
# Configure DSPy to use the GroqLM
lm = GroqLM()
dspy.configure(lm=lm)



Summarizer

In [1]:
import PyPDF2

def extract_pdf_text(path):
    text = ""
    with open(path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text


In [13]:
import dspy

class SummarizePDF(dspy.Signature):
    """Summarize a long document extracted from a PDF with quality metadata."""

    document: str = dspy.InputField()

    summary: str = dspy.OutputField(
        desc="Concise, high-level summary of the document."
    )

    key_points: list[str] = dspy.OutputField(
        desc="A bullet list of the most important points."
    )

    confidence: float = dspy.OutputField(
        desc="Model-estimated confidence score between 0 and 1."
    )

    length_tokens: int = dspy.OutputField(
        desc="Approximate token length of the summary."
    )

    coverage_score: float = dspy.OutputField(
        desc="Model’s self-rating of how well the summary covers the document (0–1)."
    )


In [15]:
class PDFSummarizer(dspy.Module):
    def __init__(self):
        super().__init__()
        self.summarize = dspy.ChainOfThought(SummarizePDF)

    def forward(self, document: str):
        out = self.summarize(document=document)
        return dspy.Prediction(
            summary=out.summary,
            key_points=out.key_points,
            confidence=out.confidence,
            length_tokens=out.length_tokens,
            coverage_score=out.coverage_score
        )


In [19]:
# 1. Extract PDF text
pdf_text = extract_pdf_text("pdfs/Book-Summary-Rich-Dad.pdf")

# 2. Initialize summarizer
summarizer = PDFSummarizer()

# 3. Run it
summary = summarizer(document=pdf_text)
print(summary)


Prediction(
    summary='The book "Rich Dad Poor Dad" teaches readers how to make their money work for them by following six key rules. The rules emphasize the importance of financial literacy, minding one\'s own business, and using the power of corporations to reduce taxes. The book also stresses the need to continually learn and improve one\'s financial intelligence in order to achieve financial freedom. By following these rules and principles, individuals can break free from the cycle of working for money and create a life of financial independence.',
    key_points="* The rich do not work for money, they have money work for them\n* Financial literacy is necessary to achieve financial freedom\n* Minding one's own business is crucial for financial success\n* Using the cover of a corporation can help reduce taxes\n* The rich invent money by creating new opportunities and taking calculated risks\n* Working to learn and continually improving one's financial intelligence is key to achiev

RAG

In [22]:
%pip install sentence-transformers faiss-cpu

Collecting sentence-transformers
  Downloading sentence_transformers-5.1.2-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Downloading transformers-4.57.3-py3-none-any.whl.metadata (43 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Downloading torch-2.9.1-cp314-cp314-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting scikit-learn (from sentence-transformers)
  Downloading scikit_learn-1.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting scipy (from sentence-transformers)
  Downloading scipy-1.16.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (62 kB)
Collecting safetensors>=0.4.3 (from transformers<5.0.0,>=4.41.0->sentence-transformers)
  Downloading safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.met

In [21]:
import os
import math
import json
import numpy as np
import faiss
from typing import List, Dict, Any

import dspy
from sentence_transformers import SentenceTransformer
import PyPDF2


ModuleNotFoundError: No module named 'faiss'