In [1]:
from pinecone import Pinecone
from dotenv import load_dotenv
import os
import voyageai

load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("rag-chat")
VOYAGE_API_KEY = os.getenv('VOYAGE_API_KEY')
vo = voyageai.Client()

  from tqdm.autonotebook import tqdm


In [2]:
query_text = "how does sam altman feel about building as a solo founder?"
result = vo.embed(query_text, model="voyage-large-2", input_type="query")

In [3]:
vector = result.embeddings[0]

In [4]:
result = index.query(
    vector=vector,
    filter={
        "pdf_id": "default"
    },
    top_k=3,
    include_metadata=True
)

In [5]:
result

{'matches': [{'id': 'default52',
              'metadata': {'pdf_id': 'default',
                           'text': 'Being a CEO is\n'
                                   'lonely. It’s important to have '
                                   'relationships with other CEOs you can call '
                                   'when everything is\n'
                                   'melting down (one of the important '
                                   'accidental discoveries of YC was a way for '
                                   'founders to\n'
                                   'have peers.)\n'
                                   'A successful startup takes a very long '
                                   'time—certainly much longer than most '
                                   'founders think\n'
                                   'at the outset. You cannot treat it as an '
                                   'all-nighter.'},
              'score': 0.791903436,
              'values': []}

In [64]:
text_strings = [i["metadata"]["text"] for i in result["matches"]]
print(text_strings)

['Consider these criteria when you’re choosing a cofounder -- it’s one of the most important\ndecisions you’ll make, and it’s often done fairly randomly. You want someone you know well,\nnot someone you just met at a cofounder dating thing. You can evaluate anyone you might\nwork with better with more data, and you really don’t want to get this one wrong. Also, at\nsome point, the expected value of the startup is likely to dip below the X axis. If you have a\npre-existing relationship with your cofounders, none of you will want to let the other down and\nyou’ll keep going. Cofounder breakups are one of the leading causes of death for early\nstartups, and we see them happen very, very frequently in cases where the founders met for\nthe express purpose of starting the company. The best case, by far, is to have a good cofounder. The next best is to be a solo founder. The\nworse case, by far, is to have a bad cofounder. If things are not working out, you should part\nways quickly. A quick 

In [6]:
import dspy
from openai import OpenAI

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
gpt4 = dspy.OpenAI(model="gpt-4", api_key=OPENAI_API_KEY)
dspy.configure(lm=gpt4)
client = OpenAI()

class QueryReformatter(dspy.Signature):
    """Given a query, make it more detailed by asking implied subquestions for a vector search."""

    query = dspy.InputField()
    new_query = dspy.OutputField(desc="The more detailed version of the query, assuming it is known the information is contained in the writing. ONLY GIVE THE QUERY, no additional text.")
queryref = dspy.Predict(QueryReformatter)

In [39]:
from typing import List, Union, Optional

class PineconeRM(dspy.Retrieve):
    def __init__(self, id:str = "", k:int = 3):
        super().__init__(k=k)
        self.id = id

    def forward(self, query:str) -> dspy.Prediction:
        queryref = dspy.Predict(QueryReformatter)
        query_redone = queryref(query=query).new_query
        voyage_call = vo.embed(query_redone, model="voyage-large-2", input_type="query")
        query_vector = voyage_call.embeddings[0]
        if not self.id:
            result = index.query(
                vector=query_vector,
                top_k=self.k,
                include_metadata=True
            )
        else:
            result = index.query(
                vector=query_vector,
                filter={
                    "pdf_id": self.id
                },
                top_k=self.k,
                include_metadata=True
            )
        text_strings = [i["metadata"]["text"] for i in result["matches"]]
        return dspy.Prediction(
            passages=text_strings
        )

In [40]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with as ground-truth information as possible."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField()

class RAG(dspy.Module):
    def __init__(self, num_passages=3, id:str = ""):
        super().__init__()

        self.retrieve = PineconeRM(id=id, k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [41]:
dspy.configure(lm=gpt4, rm=PineconeRM)

In [42]:
rag = RAG(id="default")

In [43]:
response = rag(question="How does Sam Altman perceive the division of equity among co-founders in startup companies?")

In [44]:
response

Prediction(
    context=['Consider these criteria when you’re choosing a cofounder -- it’s one of the most important\ndecisions you’ll make, and it’s often done fairly randomly. You want someone you know well,\nnot someone you just met at a cofounder dating thing. You can evaluate anyone you might\nwork with better with more data, and you really don’t want to get this one wrong. Also, at\nsome point, the expected value of the startup is likely to dip below the X axis. If you have a\npre-existing relationship with your cofounders, none of you will want to let the other down and\nyou’ll keep going. Cofounder breakups are one of the leading causes of death for early\nstartups, and we see them happen very, very frequently in cases where the founders met for\nthe express purpose of starting the company. The best case, by far, is to have a good cofounder. The next best is to be a solo founder. The\nworse case, by far, is to have a bad cofounder. If things are not working out, you should part

In [9]:
gpt4_turbo = dspy.OpenAI(model="gpt-4-turbo", api_key=OPENAI_API_KEY)

In [31]:
class QueryReformatter(dspy.Signature):
    """Given a query, make it more detailed by asking implied subquestions for a vector search."""

    query = dspy.InputField()
    new_query = dspy.OutputField(desc="The more detailed version of the query, assuming it is known the information is contained in the writing. ONLY GIVE THE QUERY, no additional text.")

class PineconeRM(dspy.Retrieve):
    """
    Retrieval model used in DSPy, reformats the query and retrieves the top k passages from Pinecone.
    """
    def __init__(self, id:str = "", k:int = 3):
        super().__init__(k=k)
        self.id = id

    def forward(self, query:str) -> dspy.Prediction:
        dspy.configure(lm=gpt4)
        queryref = dspy.Predict(QueryReformatter)
        query_redone = queryref(query=query).new_query
        voyage_call = vo.embed(query_redone, model="voyage-large-2", input_type="query")
        query_vector = voyage_call.embeddings[0]
        if not self.id:
            result = index.query(
                vector=query_vector,
                top_k=self.k,
                include_metadata=True
            )
        else:
            result = index.query(
                vector=query_vector,
                filter={
                    "pdf_id": self.id
                },
                top_k=self.k,
                include_metadata=True
            )
        text_strings = [i["metadata"]["text"] for i in result["matches"]]
        return dspy.Prediction(
            passages=text_strings
        )

class GenerateAnswer(dspy.Signature):
    """Answer questions with as ground-truth information as possible."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="complete, detailed answer to the question in max 3 sentences.")

class RAG(dspy.Module):
    """Retrieve, Answer, Generate model for question answering."""
    def __init__(self, num_passages=2, id:str = ""):
        super().__init__()

        self.retrieve = PineconeRM(id=id, k=num_passages)
        self.generate_answer = dspy.Predict(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        print(prediction)
        return dspy.Prediction(context=context, answer=prediction.answer)

def rag_qa(id, query):
    """
    Given a question and an ID, retrieves the top k passages from Pinecone and generates an answer using the RAG model.
    """
    rag = RAG(id=id)
    call = rag(question=query)
    return call

In [32]:
rag_qa(id="default", query="how does sam altman feel about raising money?")

Prediction(
    answer="Sam Altman believes that the key to successfully raising money is having a good company, as investors are looking for companies that will be successful regardless of their investment. He advises against trying to raise money when the company isn't in a good shape to attract capital, as it can damage reputation and waste time. He also suggests having fundraising conversations in parallel, insisting on clean terms, and not over-optimizing on valuation."
)


Prediction(
    context=['The secret to successfully raising money is to have a good company. All of the other stuff\nfounders do to try to over-optimize the process probably only matters about 5% of the time. Investors are looking for companies that are going to be really successful whether or not they\ninvest, but that can grow faster with outside capital.', 'The “really successful” part is important\n—because investors’ returns are dominated by the big successes, if an investor believes you\nhave a 100% chance of creating a $10 million company but almost no chance of building a\nlarger company, he/she will still probably not invest even at a very low valuation. Always\nexplain why you could be a huge success. Investors are driven by the dual fears of missing the next Google, and fear of losing money\non something that in retrospect looks obviously stupid. (For the best companies, they fear\nboth at the same time.)\nIt is a bad idea to try to raise money when your company isn’t in go