In [None]:
!pip install -U dspy-ai

In [1]:
import dspy
import chromadb
from dspy.retrieve.chromadb_rm import ChromadbRM
import json

In [2]:
from langchain_community.document_loaders import WikipediaLoader

query = "Elon Musk"
raw_documents = WikipediaLoader(query=query).load()



  lis = BeautifulSoup(html).find_all('li')


In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512, chunk_overlap=100
)
all_splits = text_splitter.split_documents(raw_documents)

In [4]:
from langchain_community.embeddings import GPT4AllEmbeddings
nomic_embedding = GPT4AllEmbeddings()

bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


In [5]:
from langchain_community.vectorstores import Chroma
from dspy.retrieve.chromadb_rm import ChromadbRM
import chromadb.utils.embedding_functions as embedding_functions
NOMIC_CHROMA_COLLECTION_NAME = "dspy-rag-chroma-nomic"
NOMIC_CHROMADB_DIR = "dspy_rag_chroma_nomic/"

# Index
nomic_vectorstore = Chroma.from_documents(
    documents=all_splits,
    collection_name=NOMIC_CHROMA_COLLECTION_NAME,
    embedding=nomic_embedding,
    persist_directory=NOMIC_CHROMADB_DIR
)

nomic_retriever = nomic_vectorstore.as_retriever()

nomic_rm = ChromadbRM(NOMIC_CHROMA_COLLECTION_NAME, NOMIC_CHROMADB_DIR, k=3)

In [17]:
import os
os.environ["OPENAI_API_KEY"] = 'openai-api-key'

# Vanilla DSPy RAG Pipeline

In [6]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

class vanilla_dspy_rag(dspy.Module):
    
    # we set num_passages=1 to avoid the same passage being repeatedly retrieved for multiple times
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [19]:
# nomic embedding model + gpt4 language model
lm_gpt4 = dspy.OpenAI(model='gpt-4-0125-preview')
dspy.settings.configure(lm=lm_gpt4, rm=nomic_rm)

question_org_elon_cofounded = "List out the companies that Elon Musk co-founded with other founders."
vanilla_dspy_rag_nomic_gpt4 = vanilla_dspy_rag()
vanilla_dspy_rag_q1 = vanilla_dspy_rag_nomic_gpt4.forward(question=question_org_elon_cofounded)
vanilla_dspy_rag_q1

Prediction(
    context=["Elon Reeve Musk ( EE-lon; born June 28, 1971) is a businessman and investor. He is the founder, chairman, CEO, and CTO of SpaceX; angel investor, CEO, product architect, and former chairman of Tesla, Inc.; owner, executive chairman, and CTO of X Corp.; founder of the Boring Company and xAI; co-founder of Neuralink and OpenAI; and president of the Musk Foundation. He is one of the wealthiest people in the world; as of April 2024, Forbes estimates his net worth to be $178 billion. \nA member of the wealthy South African Musk family, Musk was born in Pretoria and briefly attended the University of Pretoria before immigrating to Canada at age 18, acquiring citizenship through his Canadian-born mother. Two years later, he matriculated at Queen's University at Kingston in Canada. Musk later transferred to the University of Pennsylvania and received bachelor's degrees in economics and physics. He moved to California in 1995 to attend Stanford University, but dropped 

In [20]:
# nomic embedding model + gpt4 language model
question_org_elon_cofounded_1a = "Did Elon Musk co-found SpaceX with other founders?"
vanilla_dspy_rag_q1a = vanilla_dspy_rag_nomic_gpt4.forward(question=question_org_elon_cofounded_1a)
vanilla_dspy_rag_q1a

Prediction(
    context=["== Notable members ==\nMaye Musk (née Haldeman; born 1948), model and dietitian. She has appeared on the cover of several magazines, including a Time magazine health edition, Women's Day, and Vogue; Elon's mother.\nElon Musk (born 1971), entrepreneur and business magnate. Variously CEO, CTO, and/or Chairman of SpaceX, Tesla, X Corp., and Neuralink. He was Time Magazine's 2021 Person of the Year.\nKimbal Musk (born 1972), entrepreneur, philanthropist, and restaurateur. He founded Zip2 in 1998 with his brother Elon and later sold it to Compaq for $307 million. He is the co-founder and chairman of Big Green.\nTosca Musk (born 1974), filmmaker and sister of Elon; she is the co-founder of Passionflix, an OTT entertainment streaming platform and production company.\nLyndon Rive (born 1977), businessman who co-founded SolarCity and served as its CEO until 2017. He is Elon's cousin through his mother Kaye Rive, Maye Musk's twin.\n\n\n== References ==", "== Notable mem

In [21]:
# nomic embedding model + gpt4 language model
question_org_elon_cofounded_1b = "What are the other founders Elon Musk co-found SpaceX with?"
vanilla_dspy_rag_q1b = vanilla_dspy_rag_nomic_gpt4.forward(question=question_org_elon_cofounded_1b)
vanilla_dspy_rag_q1b

Prediction(
    context=["== Notable members ==\nMaye Musk (née Haldeman; born 1948), model and dietitian. She has appeared on the cover of several magazines, including a Time magazine health edition, Women's Day, and Vogue; Elon's mother.\nElon Musk (born 1971), entrepreneur and business magnate. Variously CEO, CTO, and/or Chairman of SpaceX, Tesla, X Corp., and Neuralink. He was Time Magazine's 2021 Person of the Year.\nKimbal Musk (born 1972), entrepreneur, philanthropist, and restaurateur. He founded Zip2 in 1998 with his brother Elon and later sold it to Compaq for $307 million. He is the co-founder and chairman of Big Green.\nTosca Musk (born 1974), filmmaker and sister of Elon; she is the co-founder of Passionflix, an OTT entertainment streaming platform and production company.\nLyndon Rive (born 1977), businessman who co-founded SolarCity and served as its CEO until 2017. He is Elon's cousin through his mother Kaye Rive, Maye Musk's twin.\n\n\n== References ==", "== Notable mem

In [22]:
# nomic embedding model + gpt4 language model
question_org_elon_cofounded_1c = "Did Elon co-found The Boring Company with other founders?"
vanilla_dspy_rag_q1c = vanilla_dspy_rag_nomic_gpt4.forward(question=question_org_elon_cofounded_1c)
vanilla_dspy_rag_q1c

Prediction(
    context=["== Notable members ==\nMaye Musk (née Haldeman; born 1948), model and dietitian. She has appeared on the cover of several magazines, including a Time magazine health edition, Women's Day, and Vogue; Elon's mother.\nElon Musk (born 1971), entrepreneur and business magnate. Variously CEO, CTO, and/or Chairman of SpaceX, Tesla, X Corp., and Neuralink. He was Time Magazine's 2021 Person of the Year.\nKimbal Musk (born 1972), entrepreneur, philanthropist, and restaurateur. He founded Zip2 in 1998 with his brother Elon and later sold it to Compaq for $307 million. He is the co-founder and chairman of Big Green.\nTosca Musk (born 1974), filmmaker and sister of Elon; she is the co-founder of Passionflix, an OTT entertainment streaming platform and production company.\nLyndon Rive (born 1977), businessman who co-founded SolarCity and served as its CEO until 2017. He is Elon's cousin through his mother Kaye Rive, Maye Musk's twin.\n\n\n== References ==", "== Notable mem

In [23]:
# nomic embedding model + gpt4 language model
question_org_elon_cofounded_1d = "Is The Boring Company one of the companies Elon co-founded with other founders?"
vanilla_dspy_rag_q1d = vanilla_dspy_rag_nomic_gpt4.forward(question=question_org_elon_cofounded_1d)
vanilla_dspy_rag_q1d

Prediction(
    context=["== Notable members ==\nMaye Musk (née Haldeman; born 1948), model and dietitian. She has appeared on the cover of several magazines, including a Time magazine health edition, Women's Day, and Vogue; Elon's mother.\nElon Musk (born 1971), entrepreneur and business magnate. Variously CEO, CTO, and/or Chairman of SpaceX, Tesla, X Corp., and Neuralink. He was Time Magazine's 2021 Person of the Year.\nKimbal Musk (born 1972), entrepreneur, philanthropist, and restaurateur. He founded Zip2 in 1998 with his brother Elon and later sold it to Compaq for $307 million. He is the co-founder and chairman of Big Green.\nTosca Musk (born 1974), filmmaker and sister of Elon; she is the co-founder of Passionflix, an OTT entertainment streaming platform and production company.\nLyndon Rive (born 1977), businessman who co-founded SolarCity and served as its CEO until 2017. He is Elon's cousin through his mother Kaye Rive, Maye Musk's twin.\n\n\n== References ==", "== Notable mem

In [24]:
# nomic embedding model + gpt4 language model
question_org_elon_cofounded_1e = "List out the companies that Elon Musk co-founded with other founders."
vanilla_dspy_rag_q1e = vanilla_dspy_rag_nomic_gpt4.forward(question=question_org_elon_cofounded_1e)
vanilla_dspy_rag_q1e

Prediction(
    context=["Elon Reeve Musk ( EE-lon; born June 28, 1971) is a businessman and investor. He is the founder, chairman, CEO, and CTO of SpaceX; angel investor, CEO, product architect, and former chairman of Tesla, Inc.; owner, executive chairman, and CTO of X Corp.; founder of the Boring Company and xAI; co-founder of Neuralink and OpenAI; and president of the Musk Foundation. He is one of the wealthiest people in the world; as of April 2024, Forbes estimates his net worth to be $178 billion. \nA member of the wealthy South African Musk family, Musk was born in Pretoria and briefly attended the University of Pretoria before immigrating to Canada at age 18, acquiring citizenship through his Canadian-born mother. Two years later, he matriculated at Queen's University at Kingston in Canada. Musk later transferred to the University of Pennsylvania and received bachelor's degrees in economics and physics. He moved to California in 1995 to attend Stanford University, but dropped 