In [1]:
%load_ext autoreload
%autoreload 2

import sys
import os

try: # When on google Colab, let's clone the notebook so we download the cache.
    import google.colab
    repo_path = 'dspy'
    !git -C $repo_path pull origin || git clone https://github.com/stanfordnlp/dspy $repo_path
except:
    repo_path = '.'

if repo_path not in sys.path:
    sys.path.append(repo_path)

# Set up the cache for this notebook
os.environ["DSP_NOTEBOOK_CACHEDIR"] = os.path.join(repo_path, 'cache')

import pkg_resources # Install the package if it's not installed
if not "dspy-ai" in {pkg.key for pkg in pkg_resources.working_set}:
    !pip install -U pip
    !pip install dspy-ai
    # !pip install -e $repo_path

import dspy


  import pkg_resources # Install the package if it's not installed


In [2]:
from datasets import load_dataset, Dataset
docs = load_dataset(f"Cohere/wikipedia-22-12-simple-embeddings", "en", split="train[:5%]")

In [3]:
docs

Dataset({
    features: ['id', 'title', 'text', 'url', 'wiki_id', 'views', 'paragraph_id', 'langs', 'emb'],
    num_rows: 24293
})

In [11]:

import os
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import  PodSpec

api_key = os.getenv("PINECONE_API_KEY")
pc = Pinecone(
    api_key=os.environ.get("PINECONE_API_KEY")
)

# Pick a name for the new index
index_name = 'wikipedia-articles'

# Check whether the index with the same name already exists - if so, delete it
if index_name in pc.list_indexes():
    pc.delete_index(index_name)
    
# Creates new index
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name, 
        dimension=768, 
        metric='dotproduct',
        spec=PodSpec(
            environment="gcp-starter",
        )
    )

In [12]:
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 0}},
 'total_vector_count': 0}

In [13]:

def chunker(data, batch_size):
    data_iter = iter(data)
    # end = False
    for i in range(0, len(data), batch_size):
        chunk = []
        # if i + batch_size >= len(data):
        #     batch_size += len(data) - i
        for x in data_iter:
            if len(chunk) == batch_size:
                break
            chunk.append(x)
        
        chunk_to_insert = []
        for x in chunk:
            item = {}
            item['id'] = str(x['id'])
            item['values'] = x['emb']
            item['metadata'] = {}
            item['metadata']['text'] = x['text']
            chunk_to_insert.append(item)

        yield chunk_to_insert

async_results = [
  index.upsert(vectors=chunk, async_req=True)
  for chunk in chunker(docs, batch_size=100) if len(chunk) > 0
]

# Wait for and retrieve responses (in case of error)
results = [async_result.result() for async_result in async_results]
results

[upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_count: 100,
 upserted_

In [14]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.24053,
 'namespaces': {'': {'vector_count': 24053}},
 'total_vector_count': 24053}

In [15]:
from dspy.retrieve.pinecone_rm import PineconeRM, CohereEmbed

cohere_embed = CohereEmbed()

llm = dspy.OllamaLocal(model="openhermes2.5-mistral:7b-q5_K_M", model_type="chat")
retriever_model = PineconeRM(index_name, cloud_emded_provider=cohere_embed)
dspy.settings.configure(lm=llm, rm=retriever_model)

In [16]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

(20, 50)

In [19]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [20]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [22]:
from dspy.teleprompt import BootstrapFewShot

# Validation logic: check that the predicted answer is correct.
# Also check that the retrieved context does actually contain that answer.
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

# Set up a basic teleprompter, which will compile our RAG program.
teleprompter = BootstrapFewShot(metric=validate_context_and_answer)

# Compile!
compiled_rag = teleprompter.compile(RAG(), trainset=trainset)

  0%|          | 0/20 [00:00<?, ?it/s]

100%|██████████| 20/20 [03:30<00:00, 10.54s/it]

Bootstrapped 0 full traces after 20 examples in round 0.





In [None]:
from dspy.evaluate.evaluate import Evaluate

# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)

# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_hotpotqa(compiled_rag, metric=metric)