In [1]:
import cohere
import guardrails as gd
from guardrails.validators import ValidRange, ValidChoices
from pydantic import BaseModel, Field
from rich import print
from typing import List

# 1. Vallidation Framework

We want to create a validation framework for assessing the responses to the questions. The answers would be assessed on 3 aspects:
1. Clarity and Coherence
2. Technical Accuracy based on the reference document
3. Answer completeness i.e. if all the parts of the questions are answered

In [2]:
# asked by the app
question = "What is data warehousing, and how does it differ from traditional databases?"
# answered by the user
answer = "Data warehouse is a place where data is stored for analytical queries and business decision making"

In [146]:
class ResponseClarity(BaseModel):
    response_clarity: str = Field(..., description="Is user answer clear and coherent",
        validators=[ValidChoices(["Yes", "Somewhat", "No"], on_fail="reask")]
    )
    # clarity_explanation: str = Field(..., description="what is the reason that question got rated")

class TechnicalAccuracy(BaseModel):
    technical_accuracy: str = Field(..., description="Is user answer technically accurate",
        validators=[ValidChoices(["Yes", "Somewhat", "No"], on_fail="reask")]
    )

class AnswerCompleteness(BaseModel):
    answer_completeness: str = Field(..., description="Are all parts of questions answered by user",
        validators=[ValidChoices(["Yes", "Somewhat", "No"], on_fail="reask")]
    )

class ResponseValidation(BaseModel):
    response_clarity: List[ResponseClarity] = Field(..., description="Clarity in user's answer")
    technical_accuracy: List[TechnicalAccuracy] = Field(..., description="Technical accuracy")
    answer_completeness: List[AnswerCompleteness] = Field(..., description="Coherence")


In [147]:
PROMPT = """Given the following question and answer,
please extract a dictionary that contains the assessment of the answer.

Question: ${question}
Answer:  ${answer}

${gr.complete_json_suffix_v2}
"""

# 2. Initialize Guard Object on the Schema

In [148]:
guard = gd.Guard.from_pydantic(ResponseValidation, prompt=PROMPT, num_reasks=5)
print(guard.base_prompt)

# 3. Wrap RAG chatbot with Guard Object
Run text-search-using-RAGs.ipynb

In [8]:
%run text-search-using-RAGs.ipynb

In [160]:
class Chatbot:
    def __init__(self, datastore: Datastore):
        """
        Initializes an instance of the Chatbot class.

        Parameters:
        storage (Storage): An instance of the Storage class.

        """
        self.datastore = datastore
        self.conversation_id = str(uuid.uuid4())
        self.response_queries = None
        self.chunks = []
        self.documents = []

    def _get_reranked_docs(self, prompt):
            self.response_queries = co.chat(message=prompt, search_queries_only=True)

            if self.response_queries.search_queries:
                print("Retrieving information...", end="")

                # Get the query(s)
                queries = []
                for search_query in self.response_queries.search_queries:
                    queries.append(search_query["text"])

                # Retrieve documents for each query
                for query in queries:
                    self.chunks.extend(self.datastore.search_and_rerank(query))
            
            else:
                print('unable to locate the information')
                
        
    def run(self, prompt, **kwargs):
        """
        Runs the chatbot application.

        """
        self._get_reranked_docs(prompt)
        while True:
            # Get the user message

            response = co.chat(
                message=prompt,
                # model='command-nightly',
                documents=self.chunks,
                conversation_id=self.conversation_id, 
                **kwargs
                # stream=True,
                # temperature=0.2
                )

            # Documents
            if response.citations:
                print("\n\nDOCUMENTS:")
                self.documents = [{'id': doc['id'],
                                'text': doc['text'][:50] + '...',
                                'title': doc['title'],
                                'url': doc['filename']} 
                                for doc in response.documents]
                for doc in self.documents:
                    print(doc)

            print(f"\n{'-'*100}\n")

            # print(f"\n{'-'*100}\n")
            return response.text

In [150]:
PROMPT = """For the below question there is an expected answer, however user has answered based on their understanding
please extract a dictionary that contains the assessment of the answer.

Question: ${question}
Expected Answer: ${expected_answer}
User Answer: ${answer}

assess the user's answer based on the expected answer im a dictionary format based on assessment criteria 

${gr.complete_json_suffix_v2}
"""

In [161]:
# asked by the app
question = "What is data warehousing, and how does it differ from traditional databases?"
# answered by the user
answer = "Data warehouse is for analytics while traditional database is for transactions"
chatbot = Chatbot(datastore)
# chatbot._get_reranked_docs(question)
# documents = chatbot.chunks
expected_answer = chatbot.run(prompt=question, temperature=0.1)


In [167]:
expected_answer

'Data warehousing involves the process of collecting, storing, and managing data from various sources in a single place to aid decision-making processes. Unlike traditional databases, data warehouses optimise analytical queries over transactional operations, using a dimensional modelling approach.\n\nWhile traditional databases are optimised for structured and predefined data, often employing a schema-on-write approach, data warehouses are more flexible, allowing for semi-structured or unstructured data and using a schema-on-read approach. This enables agile data exploration and analysis.'

In [162]:
response = guard(custom_cochat_api,
                # message=get_base_prompt(question,answer),
                model='command-nightly',
                prompt_params={'question':question, 'answer': answer, 'expected_answer':expected_answer},
                # documents=documents,
                # conversation_id=chatbot.conversation_id,
                # stream=True,
                temperature=0.3
                )

In [166]:
response.validated_output

{'response_clarity': [{'response_clarity': 'Yes'}],
 'technical_accuracy': [{'technical_accuracy': 'Somewhat'}],
 'answer_completeness': [{'answer_completeness': 'No'}]}