In [None]:
%pip install -qq promptlayer
%env OPENAI_API_KEY=

OPENAI_API_KEY=""
PL_API_KEY = ""

In [None]:
import promptlayer

promptlayer.api_key = PL_API_KEY

openai = promptlayer.openai
openai.api_key = OPENAI_API_KEY

import fastrepl

In [None]:
evaluator = fastrepl.RAGEvaluator(
    node=fastrepl.RAGAS(
        model="gpt-3.5-turbo",
        metric="Faithfulness",
    ),
)

ds = fastrepl.Dataset.from_dict(
    {
        "question": ["how to do great work?"] * 2,
        "contexts": [["There's no one simple way to do great work."]] * 2,
        "answer": [
            "Just do it.",
            "You shouldn't look for one-size-fits-all solutions.",
        ],
    }
)
runner = fastrepl.local_runner(evaluator=evaluator, dataset=ds)
result = runner.run()

result["result"]

In [None]:
pl_runner = fastrepl.pl_runner(evaluator=evaluator, api_key=PL_API_KEY)

In [None]:
def create_docs(url: str) -> list[str]:
    from langchain.document_loaders import WebBaseLoader

    loader = WebBaseLoader(url)
    data = loader.load()

    from langchain.text_splitter import RecursiveCharacterTextSplitter

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    docs = splitter.split_documents(data)
    return [doc.page_content for doc in docs[1:]]


def create_collection(name: str, docs: list[str]):
    import chromadb
    from chromadb.utils import embedding_functions

    client = chromadb.EphemeralClient()
    collection = client.create_collection(
        name=name,
        get_or_create=True,
        embedding_function=embedding_functions.OpenAIEmbeddingFunction(
            api_key=OPENAI_API_KEY
        ),
    )

    collection.add(documents=docs, ids=[str(i) for i in range(len(docs))])
    return collection

In [None]:
from typing import List

from chromadb import Collection


class QA:
    def __init__(self, collection: Collection) -> None:
        self.collection = collection

        self.tpl_name = "simple-rag"
        tpl_dict = promptlayer.prompts.get(self.tpl_name, version=1)
        self.system_tpl = tpl_dict["messages"][0]["prompt"]["template"]
        self.user_content_tpl = tpl_dict["messages"][1]["prompt"]["template"]

    def retrieve_docs(self, question: str) -> List[str]:
        result = self.collection.query(query_texts=[question], n_results=1)
        contexts = result["documents"][0]
        return contexts

    def generate(self, question: str, contexts: List[str]) -> str:
        model = "gpt-3.5-turbo"
        messages = [
            {
                "role": "system",
                "content": self.system_tpl.format(contexts=contexts),
            },
            {
                "role": "user",
                "content": self.user_content_tpl.format(question=question),
            },
        ]

        response, request_id = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            return_pl_id=True,
        )
        answer = response.choices[0].message.content

        promptlayer.track.prompt(
            request_id=request_id,
            prompt_name=self.tpl_name,
            prompt_input_variables={"question": question, "answer": answer},
        )

        ds = fastrepl.Dataset.from_dict(
            {
                "question": [question],
                "contexts": [contexts],
                "answer": [answer],
                "request_id": [request_id],
            }
        )
        pl_runner.run(ds)  # differnt thread

        return answer

    def run(self, question: str) -> str:
        contexts = self.retrieve_docs(question)
        return self.generate(question, contexts)

In [None]:
docs = create_docs("http://paulgraham.com/greatwork.html")
collection = create_collection("how-to-do-great-work", docs)

qa = QA(collection)

In [None]:
qa.run("how to do great work?")

In [None]:
from datasets import load_dataset

ds = load_dataset("repllabs/questions_how_to_do_great_work", split="processed")
ds = ds.remove_columns(["model"])
ds = ds.shuffle(seed=12)
ds = ds.select(range(10))
ds = fastrepl.Dataset.from_hf(ds)
ds

In [None]:
questions = ds["question"]

fastrepl.local_runner(fn=qa.run).run(args_list=[(q,) for q in questions])