Building the projects including the library that contains the prompts to evaluate

In [None]:
dotnet build ../examples/userstorygenerator/UserStoryGenerator
dotnet build ../examples/batcheval/library

We load our libraries and include them in the usings

In [None]:
#i "nuget:https://api.nuget.org/v3/index.json" 
#r "nuget:Microsoft.SemanticKernel, 1.0.1"
#r "nuget:OpenTelemetry.Exporter.Console, 1.7.0"
#r "../examples/userstorygenerator/UserStoryGenerator/bin/Debug/net8.0/UserStoryGenerator.dll"
#r "../examples/batcheval/library/bin/Debug/net8.0/BatchEval.Core.dll"

using Microsoft.SemanticKernel;
using UserStoryGenerator;
using BatchEval.Core;
using System.Text.Json.Nodes;

We initialize our semantic kernel with our endpoint and key, this is going to be used for both, the prompts and the evaluation

In [None]:
string modelName = "gpt-4";
string openAIEndpoint = "https://<myendpoint>.openai.azure.com/";
string openAIKey = "";

In [None]:
var builder = Kernel.CreateBuilder();

builder.AddAzureOpenAIChatCompletion(
        modelName,
        openAIEndpoint,
        openAIKey);

var kernel = builder.Build();

Here we import our evaluation data (aka golden set). 
If we want to use strong typing, we can also declare a user input class and pass it into the method Preview.


In [None]:
// var dataFilePath = "assets/data.jsonl";
var dataFilePath = "assets/smallbatch.jsonl";

var preview = await BatchEval.Core.JsonlUtils.Preview<JsonNode>(dataFilePath);
preview

Now we need to declare a class that implements `IInputProcessor`, this should implement the Process method, returning a ModelOutput with an Input (aka question), and Output (aka answer).

In [None]:
internal class UserStoryCreator : BatchEval.Core.IInputProcessor<JsonNode>
{
    private readonly UserStorySkill userStoryGenerator;

    public UserStoryCreator(Kernel kernel)
    {
        this.userStoryGenerator = UserStorySkill.Create(kernel);
    }

    public async Task<ModelOutput> Process(JsonNode userInput)
    {
        var description = userInput["Description"];
        var projectContext = userInput["ProjectContext"];
        var persona = userInput["Persona"];
        
        var userStory = await userStoryGenerator.GetUserStory(
            (string)userInput["Description"],
            (string)userInput["ProjectContext"],
            (string)userInput["Persona"]);

        return new ModelOutput() {
            Input = $"Generate a user story for {userInput["Persona"]} so it can {userInput["Description"]}",
            Output = $"{userStory!.Title} - {userStory!.Description}"
        };
    }
}

And finally we execute the batch evaluation

In [None]:
var batchEval = new BatchEval<JsonNode>();

batchEval
    .AddEvaluator(new CoherenceEval(kernel))
    .AddEvaluator(new RelevanceEval(kernel))
    .AddEvaluator(new GroundednessEval(kernel))
    .AddEvaluator(new RegexEval("format", "As a .*, I want to .* so .*"))
    .AddEvaluator(new LenghtEval());

await batchEval
    .WithInputProcessor(new UserStoryCreator(kernel))
    .WithJsonl(dataFilePath)
    .Run();