In [2]:
# all the code we wrote in docs.ipynb is in the github.py file

import docs

github_data = docs.read_github_data()
parsed_data = docs.parse_data(github_data)

In [3]:
long_text = parsed_data[45]['content']
len(long_text)

21712

In [4]:
from openai import OpenAI

openai_client = OpenAI()

def llm_structured(instructions, user_prompt, output_type, model="gpt-4o-mini"):
    messages = [
        {"role": "system", "content": instructions},
        {"role": "user", "content": user_prompt}
    ]

    response = openai_client.responses.parse(
        model=model,
        input=messages,
        text_format=output_type
    )

    return response.output_parsed

In [5]:
instructions = """
Split the provided document into logical sections that make sense for a Q&A system.
Each section should be self-contained and cover a specific topic or concept.
Sections should be relatively large (3000-5000 characters).
""".strip()

In [6]:
from pydantic import BaseModel

class Section(BaseModel):
    title: str
    markdown: str

class Document(BaseModel):
    title: str
    sections: list[Section]

In [None]:
result = llm_structured(
    instructions=instructions,
    user_prompt=long_text,
    output_type=Document
)

In [None]:
len(result.sections)

5

In [None]:
print(f'{result.title}')
print()

for section in result.sections:
    print(f'-- {section.title} --')
    print()
    print(section.markdown)
    print()

Regression Testing for LLM Outputs

-- Introduction to Regression Testing for LLM Outputs --

In this tutorial, you will learn how to perform regression testing for LLM outputs. Regression testing is essential when you make changes to your model, prompts, or any components of your system affecting output. By comparing new responses with older ones after modifying parameters, you can identify significant changes, ensuring updates are reliable and identifying issues to fix.

**Example Setup: Evidently Cloud**  
This tutorial will use Evidently Cloud, allowing you to run evaluations in Python and upload results. You can also view reports locally if preferred. For self-hosted instances, simply replace `CloudWorkspace` with `Workspace`.

-- Tutorial Overview --

### Scope of the Tutorial
This tutorial outlines the following key steps:

1. **Creating a Toy Dataset**: Build a small dataset of questions and reference answers.
2. **Generating New Answers**: Simulate generating new answers using