Building the projects including the library that contains the prompts to evaluate

In [None]:
dotnet build ../examples/userstorygenerator/UserStoryGenerator
dotnet build ../examples/batcheval/library

We load our libraries and include them in the usings

In [None]:
#i "nuget:https://api.nuget.org/v3/index.json" 
#r "nuget:Microsoft.SemanticKernel, 1.0.1"
#r "nuget:OpenTelemetry.Exporter.Console, 1.7.0"
#r "nuget:OpenTelemetry.Exporter.OpenTelemetryProtocol, 1.7.0"
#r "../examples/userstorygenerator/UserStoryGenerator/bin/Debug/net8.0/UserStoryGenerator.dll"
#r "../examples/batcheval/library/bin/Debug/net8.0/SKEval.dll"

using Microsoft.DotNet.Interactive;
using Microsoft.SemanticKernel;
using UserStoryGenerator;
using Microsoft.SKEval;
using System.Text.Json.Nodes;

We initialize our semantic kernel with our endpoint and key, this is going to be used for both, the prompts and the evaluation

In [None]:
string modelName = "gpt-4";
string openAIEndpoint = "https://maho-sandbox-gpt4.openai.azure.com/";
string openAIKey = "";
/*
string modelName = "phi";
string openAIEndpoint = "http://localhost:11434";
string openAIKey = "ollama";
*/

In [None]:
var builder = Microsoft.SemanticKernel.Kernel.CreateBuilder();


builder.AddAzureOpenAIChatCompletion(
        modelName,
        openAIEndpoint,
        openAIKey);

/*
builder.AddOpenAIChatCompletion(
        modelName,
        openAIEndpoint,
        openAIKey);

*/
var kernel = builder.Build();

Here we import our evaluation data (aka golden set). 
If we want to use strong typing, we can also declare a user input class and pass it into the method Preview.


In [None]:
var dataFilePath = "assets/data.jsonl";
//var dataFilePath = "assets/smallbatch.jsonl";
//dataFilePath = "assets/tinybatch.jsonl";
//dataFilePath = "assets/adversarybatch.jsonl";

var preview = await BatchEval.Core.JsonlUtils.Preview<JsonNode>(dataFilePath);
preview

Now we need to declare a class that implements `IInputProcessor`, this should implement the Process method, returning a ModelOutput with an Input (aka question), and Output (aka answer).

In [None]:
internal class UserStoryCreator : Microsoft.SKEval.IInputProcessor<JsonNode>
{
    private readonly UserStorySkill userStoryGenerator;

    public UserStoryCreator(Microsoft.SemanticKernel.Kernel kernel)
    {
        this.userStoryGenerator = UserStorySkill.Create(kernel);
    }

    public async Task<ModelOutput> Process(JsonNode userInput)
    {
        var description = userInput["Description"];
        var projectContext = userInput["ProjectContext"];
        var persona = userInput["Persona"];
        
        var userStory = await userStoryGenerator.GetUserStory(
            (string)userInput["Description"],
            (string)userInput["ProjectContext"],
            (string)userInput["Persona"]);

        return new ModelOutput() {
            Input = $"Generate a user story for {userInput["Persona"]} so it can {userInput["Description"]}",
            Output = $"{userStory!.Title} - {userStory!.Description}"
        };
    }
}

We will initialize the batch evaluation, in case the input is a strong type we can change `JsonNode here`

In [None]:
var batchEval = new BatchEval<JsonNode>();

In case we are using OpenTelemetry endpoint setup we can add it here, otherwise results will be printed in the standard output. 

> **If you have not setup OTEL collector skip this cell**

In [None]:
batchEval.OtlpEndpoint = "http://localhost:4317";
batchEval.ConfigureMeterBuilder();

And finally we execute the batch evaluation, we can add as many evaluators as we need/want

In [None]:
batchEval
    .AddEvaluator(new CoherenceEval(kernel))
    .AddEvaluator(new RelevanceEval(kernel))
    .AddEvaluator(new GroundednessEval(kernel))
    .AddEvaluator(new RegexEval("format", "As a.*, I want to .* so .*"))
    .AddEvaluator(new LenghtEval());


Optionally get the results in a csv file

In [None]:
batchEval.WithCsvOutputProcessor("results_adversary.csv");

In [None]:
var results = await batchEval
    .WithInputProcessor(new UserStoryCreator(kernel))
    .WithJsonl(dataFilePath)
    .Run();

results.EvalResults