In [1]:
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
openai_client = OpenAI()

In [2]:
from pydantic import BaseModel

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

In [3]:
CalendarEvent.model_json_schema()

{'properties': {'name': {'title': 'Name', 'type': 'string'},
  'date': {'title': 'Date', 'type': 'string'},
  'participants': {'items': {'type': 'string'},
   'title': 'Participants',
   'type': 'array'}},
 'required': ['name', 'date', 'participants'],
 'title': 'CalendarEvent',
 'type': 'object'}

In [7]:
response = openai_client.responses.parse(
    model="gpt-4o-mini",
    input=[
        {"role": "system", "content": "Extract the event information."},
        {
            "role": "user",
            "content": "Alice and Bob are going to a science fair on Friday.",
        }
    ],
    text_format=CalendarEvent,
)

In [8]:
response

ParsedResponse[CalendarEvent](id='resp_08d201650bca45850069a319ac04e0819684b5d90faafaa1db', created_at=1772296620.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-4o-mini-2024-07-18', object='response', output=[ParsedResponseOutputMessage[CalendarEvent](id='msg_08d201650bca45850069a319ad158881969c80dd1546c62961', content=[ParsedResponseOutputText[CalendarEvent](annotations=[], text='{"name":"Science Fair","date":"Friday","participants":["Alice","Bob"]}', type='output_text', logprobs=[], parsed=CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob']))], role='assistant', status='completed', type='message')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, background=False, completed_at=1772296621.0, conversation=None, max_output_tokens=None, max_tool_calls=None, previous_response_id=None, prompt=None, prompt_cache_key=None, prompt_cache_retention=None, reasoning=Reasoning(effort=None, generate_

In [10]:
response.output_parsed

CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob'])

In [11]:
response.output[0].content[0].text

'{"name":"Science Fair","date":"Friday","participants":["Alice","Bob"]}'

In [12]:
event = response.output_parsed

In [13]:
event

CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob'])

# Structured RAG

In [17]:
from gitsource import GithubRepositoryDataReader, chunk_documents
from minsearch import Index

reader = GithubRepositoryDataReader(
    repo_owner="evidentlyai",
    repo_name="docs",
    allowed_extensions={"md", "mdx"},
)
files = reader.read()


parsed_docs = [doc.parse() for doc in files]
chunked_docs = chunk_documents(parsed_docs, size=3000, step=1500)

index = Index(
    text_fields=["title", "description", "content"],
    keyword_fields=["filename"]
)
index.fit(chunked_docs)

print(f"Indexed {len(chunked_docs)} chunks from {len(files)} documents")


Indexed 385 chunks from 95 documents


In [18]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt, instructions)
    return answer

In [19]:
def search(query):
    results = index.search(query = query, num_results=5)
    return results
    

In [20]:
import json

instructions = """
You're a documentation assistant. Answer the QUESTION based on the CONTEXT from our documentation.

Use only facts from the CONTEXT when answering.
If the answer isn't in the CONTEXT, say so.
"""

prompt_template = """
<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(question, search_results):
    context = json.dumps(search_results, indent=2)
    return prompt_template.format(
        question=question,
        context=context
    )
    

In [21]:

def llm(user_prompt,
        instructions=None,
        model="gpt-4o-mini"
    ):
    messages = []

    if instructions:
        messages.append({
            "role": "system",
            "content": instructions
        })

    messages.append({
        "role": "user",
        "content": user_prompt
    })

    response = openai_client.responses.create(
        model=model,
        input=messages
    )

    return response.output_text


In [22]:
answer = rag('How do I implement llm as a judge',)

In [23]:
answer

'To implement LLM as a judge, follow these steps based on the tutorial provided:\n\n1. **Setup**:\n   - Install the required package:\n     ```bash\n     pip install evidently\n     ```\n   - Import necessary modules:\n     ```python\n     import pandas as pd\n     import numpy as np\n     from evidently import Dataset, DataDefinition, Report, BinaryClassification\n     from evidently.llm.templates import BinaryClassificationPromptTemplate\n     ```\n\n2. **Create an Evaluation Dataset**:\n   - Design a toy Q&A dataset that includes:\n     - Questions as inputs.\n     - Target responses as approved answers.\n     - New responses imitated from the system.\n     - Manual labels indicating if the response is correct or incorrect.\n\n3. **Pass OpenAI API Key**:\n   - Set your OpenAI API key as an environment variable:\n     ```python\n     import os\n     os.environ["OPENAI_API_KEY"] = "YOUR_KEY"\n     ```\n\n4. **Run LLM as a Judge**:\n   - Create an LLM evaluator prompt and attach descri

In [67]:

def llm_structured(user_prompt,
                   output_type,
                   instructions=None,
                   model="gpt-4o-mini"
    ):
    messages = []

    if instructions:
        messages.append({
            "role": "system",
            "content": instructions
        })

    messages.append({
        "role": "user",
        "content": user_prompt
    })

    response = openai_client.responses.parse(
        model=model,
        input=messages,
        text_format=output_type
    )

    return response.output_parsed


In [26]:
response = llm_structured(user_prompt="Alice and Bob are going to a Science Fair on Friday",
                          instructions="Extract the event information",
                          output_type=CalendarEvent,
)

In [27]:
response

CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob'])

In [28]:
from typing import Optional

class RAGResponse(BaseModel):
    answer: Optional[str]=None
    found_answer: bool

In [66]:
def rag_structured(query,output_type=RAGResponse):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    return llm_structured(user_prompt=prompt,
                          instructions=instructions,
                          output_type=output_type)

In [31]:
answer = rag_structured('How do I install Kafka?')

In [32]:
print(answer.answer)
print(answer.found_answer)

None
False


In [33]:
RAGResponse.model_json_schema()

{'properties': {'answer': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'default': None,
   'title': 'Answer'},
  'found_answer': {'title': 'Found Answer', 'type': 'boolean'}},
 'required': ['found_answer'],
 'title': 'RAGResponse',
 'type': 'object'}

In [65]:

instructions = """
You're a documentation assistant. Answer the QUESTION based on the CONTEXT from our documentation.
"""

In [38]:
class RAGResponse(BaseModel):
    """   
    The response from the RAG documentation system
    If the answer to the question is not found in the database, 'answer' is none.
    """
    answer: Optional[str]=None
    found_answer: bool

In [39]:
RAGResponse.model_json_schema()

{'description': "The response from the RAG documentation system\nIf the answer to the question is not found in the database, 'answer' is none.",
 'properties': {'answer': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'default': None,
   'title': 'Answer'},
  'found_answer': {'title': 'Found Answer', 'type': 'boolean'}},
 'required': ['found_answer'],
 'title': 'RAGResponse',
 'type': 'object'}

In [40]:
answer = rag_structured('How do I install Kafka?')
print(answer.answer)
print(answer.found_answer)

None
False


In [48]:
from pydantic import Field

class RAGResponse(BaseModel):
    """   
    The response from the RAG documentation system
    """
    answer: Optional[str]=Field(None, description="If you can't find the answer, set 'answer' to None")
    found_answer: bool =Field(description="True if the answer is found , False otherwise") 

In [49]:
RAGResponse.model_json_schema()

{'description': 'The response from the RAG documentation system',
 'properties': {'answer': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'default': None,
   'description': "If you can't find the answer, set 'answer' to None",
   'title': 'Answer'},
  'found_answer': {'description': 'True if the answer is found , False otherwise',
   'title': 'Found Answer',
   'type': 'boolean'}},
 'required': ['found_answer'],
 'title': 'RAGResponse',
 'type': 'object'}

In [50]:
answer = rag_structured('How do I install Kafka?')
print(answer.answer)
print(answer.found_answer)

None
False


In [69]:
from typing import Literal

class RAGResponse(BaseModel):
    """
    This model provides a structured answer with metadata about the response,
    including confidence, categorization, and follow-up suggestions.
    """
    answer: str = Field(None, description="The main answer to the user's question in markdown")
    found_answer: bool = Field(description="True if relevant information was found in the documentation")
    confidence: float = Field(description="Confidence score from 0.0 to 1.0 indicating how certain the answer is")
    confidence_explanation: str = Field(description="Explanation about the confidence level")
    answer_type: Literal["how-to", "explanation", "troubleshooting", "comparison", "reference"] = Field(description="The category of the answer")
    followup_questions: list[str] = Field(description="Suggested follow-up questions the user might want to ask")

In [70]:
RAGResponse.model_json_schema()

{'description': 'This model provides a structured answer with metadata about the response,\nincluding confidence, categorization, and follow-up suggestions.',
 'properties': {'answer': {'description': "The main answer to the user's question in markdown",
   'title': 'Answer',
   'type': 'string'},
  'found_answer': {'description': 'True if relevant information was found in the documentation',
   'title': 'Found Answer',
   'type': 'boolean'},
  'confidence': {'description': 'Confidence score from 0.0 to 1.0 indicating how certain the answer is',
   'title': 'Confidence',
   'type': 'number'},
  'confidence_explanation': {'description': 'Explanation about the confidence level',
   'title': 'Confidence Explanation',
   'type': 'string'},
  'answer_type': {'description': 'The category of the answer',
   'enum': ['how-to',
    'explanation',
    'troubleshooting',
    'comparison',
    'reference'],
   'title': 'Answer Type',
   'type': 'string'},
  'followup_questions': {'description': 'S

In [76]:
answer = rag_structured('How do I evaluate llms?', RAGResponse)


In [75]:
print(answer.answer)
print(answer.found_answer)
print(answer.confidence)
print(answer.followup_questions)

### How to Evaluate LLMs

Evaluating Language Learning Models (LLMs) can be accomplished using a structured approach involving multiple LLMs to judge the same outputs. Below is a step-by-step guide:

#### 1. **Preparation**
   - **Install Required Libraries**:
     ```python
     pip install evidently litellm
     ```
   - **Import Necessary Components**:
     ```python
     import pandas as pd
     from evidently import Dataset, DataDefinition, Report
     from evidently.presets import TextEvals
     from evidently.descriptors import LLMEval, TestSummary
     from evidently.llm.templates import BinaryClassificationPromptTemplate
     ```

#### 2. **Set Up Evaluator LLMs**
   - Pass the API keys for the LLMs you'll utilize:
     ```python
     import os
     os.environ["OPENAI_API_KEY"] = "YOUR_KEY"
     os.environ["GEMINI_API_KEY"] = "YOUR_KEY"
     os.environ["ANTHROPIC_API_KEY"] = "YOUR_KEY"
     ```

   - Optionally, set up an Evidently Cloud workspace to store evaluation results:


In [77]:
answer = rag_structured('How do I install Kafka on windows?', RAGResponse)

In [80]:
print(answer.answer[:100])
print(answer.found_answer)
print(answer.confidence)
print(answer.confidence_explanation)
print(answer.followup_questions)

To install Kafka on Windows, follow these steps:

1. **Download Kafka**: Visit the [Apache Kafka web
False
0.9
There was no specific documentation in the provided context about installing Kafka on Windows, but the information provided here is commonly known and likely accurate based on generally available resources.
['What is Kafka used for?', 'How do I configure Kafka settings?', 'Can I run Kafka on a cloud service?']


# Nested Fields with Mutual Exclusivity

In [81]:
from pydantic import model_validator

class AnswerNotFound(BaseModel):
    explanation: str

class AnswerResponse(BaseModel):
    """
    If answer is found, 'answer' is populated.
    If no answer is found, 'answer_not_found' is populated.
    Only one of the two fields can be set at a time. Never both or neither.
    """

    answer_not_found: Optional[AnswerNotFound] = None
    found_answer: bool
    answer: Optional[RAGResponse] = None

    
    @model_validator(mode="after")
    def check_consistency(self):
        if self.answer is not None and self.answer_not_found is not None:
            raise ValueError("Provide either 'answer' or 'answer_not_found', not both.")

        if self.answer is None and self.answer_not_found is None:
            raise ValueError("Provide either 'answer' or 'answer_not_found'.")

        return self



In [87]:
AnswerResponse.model_json_schema()

{'$defs': {'AnswerNotFound': {'properties': {'explanation': {'title': 'Explanation',
     'type': 'string'}},
   'required': ['explanation'],
   'title': 'AnswerNotFound',
   'type': 'object'},
  'RAGResponse': {'description': 'This model provides a structured answer with metadata about the response,\nincluding confidence, categorization, and follow-up suggestions.',
   'properties': {'answer': {'description': "The main answer to the user's question in markdown",
     'title': 'Answer',
     'type': 'string'},
    'found_answer': {'description': 'True if relevant information was found in the documentation',
     'title': 'Found Answer',
     'type': 'boolean'},
    'confidence': {'description': 'Confidence score from 0.0 to 1.0 indicating how certain the answer is',
     'title': 'Confidence',
     'type': 'number'},
    'confidence_explanation': {'description': 'Explanation about the confidence level',
     'title': 'Confidence Explanation',
     'type': 'string'},
    'answer_type': 

In [83]:
answer = rag_structured('How do I install Kafka on windows?', AnswerResponse)
answer

In [88]:
answer = rag_structured('How do I evaluate llms?', AnswerResponse)
answer

AnswerResponse(answer_not_found=None, found_answer=True, answer=RAGResponse(answer='## How to Evaluate LLMs\n\nEvaluating Large Language Models (LLMs) can be approached through a method involving multiple LLMs acting as judges to assess the same output. This method provides a more reliable evaluation by consolidating their assessments to determine if an output is appropriate or if there are differing opinions. Hereâ€™s a structured approach:\n\n### Step-by-Step Evaluation Process\n\n1. **Preparation**\n   - Install Evidently and necessary packages:\n     ```bash\n     pip install evidently litellm\n     ```\n   - Import required components:\n     ```python\n     import pandas as pd\n     from evidently import Dataset, DataDefinition, Report\n     from evidently.presets import TextEvals\n     from evidently.descriptors import LLMEval, TestSummary\n     ```\n\n2. **Set Up Evaluator LLMs**\n   - Provide API keys for the LLMs you will use:\n     ```python\n     import os\n     os.environ["

In [91]:
from pydantic import ValidationError
try:
    AnswerResponse()
except ValidationError as e:
    print("Validation Error")
    print(e)

Validation Error
1 validation error for AnswerResponse
found_answer
  Field required [type=missing, input_value={}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.12/v/missing
