In [1]:
!pip install -r requirements.txt -q

In [1]:
import os
from dotenv import load_dotenv

import openai
import promptquality as pq
import pandas as pd
from pymilvus import connections, Collection
from sentence_transformers import SentenceTransformer

load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")
zilliz_uri = os.getenv("ZILLIZ_URI")
zilliz_token = os.getenv("ZILLIZ_TOKEN")

In [3]:
connections.connect(uri=zilliz_uri, token=zilliz_token)
model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2")
collection = Collection(name="tds_articles")

In [31]:
def get_context(query, similarity_top_k=10):
  query_embedding = model.encode(query)
  closest = collection.search([query_embedding],
                    anns_field='embedding', 
                      param={"metric_type": "L2",
                              "params": {"nprobe": 16}}, 
                      limit=5,
                      output_fields=["paragraph"])

  contexts = [c.entity.get("paragraph") for c in closest[0]]
  context_string = "\n\n".join(contexts)
  context_embedding = model.encode(context_string)
  return context_string, context_embedding, query_embedding

# Each 'query' represents a user query we'd like the model to answer using context from a vector DB.
# It could also contain a 'target' column for BLEU and ROUGE computation.
# "What is the capital of France?", "What is the capital of Germany?", 

queries = ["Explain vector embeddings to me", "What is the best phone to buy?", 
           "What is machine learning?"]

contexts, context_embeddings, query_embeddings = [], [], []
for query in queries:
  context, context_embedding, query_embedding = get_context(query)
  contexts.append(context)
  context_embeddings.append(context_embedding)
  query_embeddings.append(query_embedding)


df = pd.DataFrame()
df["query"] = queries
df["context"] = contexts
df["context_embedding"] = context_embeddings
df["query_embedding"] = query_embeddings

df.head()

Unnamed: 0,query,context,context_embedding,query_embedding
0,Explain vector embeddings to me,Embedding (also called Vector Embeddings) is a...,"[0.016721306, -0.07329102, 0.04982487, -0.0250...","[0.009870021, -0.09072343, -0.025687167, -0.06..."
1,What is the best phone to buy?,The advantages of Moto:\n\nMobileNet\n\nThe mo...,"[-0.031568494, 0.078815974, 0.047155913, 0.049...","[-0.06841213, 0.057167217, 0.030635849, -0.002..."
2,What is machine learning?,"Machine Learning (ML), a technique in which ma...","[-0.03973866, -0.061922736, -0.09700069, -0.01...","[-0.019182485, -0.024332881, -0.047722008, -0...."


### Choose the metrics you'd like to run and create any of your own

In [32]:
from promptquality import Scorers, SupportedModels

metrics = [
    Scorers.groundedness,
    Scorers.context_relevance,
    # Uncertainty, BLEU, and ROUGE are automatically included
]

### Define your template

In [33]:
template = """You are an expert Q&A system that is trusted around the world.
Always answer the query using the provided context information, and not prior knowledge.
Some rules to follow:
1. Never directly reference the given context in your answer.
2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.

Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query}
Answer: """

### Run your prompts and evaluation metrics

In [16]:
# pq.login("https://console.sandbox.rungalileo.io")

In [34]:
pq.run(project_name='rag_demo_10_11',
       template=template,
       dataset=df.to_dict(orient="records"),
       scorers=metrics,
       settings=pq.Settings(model_alias=SupportedModels.chat_gpt_16k))

Prompt run complete!: : 8it [00:09,  1.21s/it]  


🔭 View your prompt run on the Galileo console at: https://console.sandbox.rungalileo.io/prompt-evaluation/prompts?projectId=529569d3-6d13-4516-8622-a74c178bd2f5&runId=17b0ae8d-a3c7-4a9e-a5da-defc502296da&taskType=7


PromptMetrics(total_responses=3, average_hallucination=0.35911368989061826, average_bleu=None, average_rouge=None, average_cost=0.0019920000000000003, total_cost=0.005976)