# SK Learnings Chatbot

## Setup

### Load required .NET packages and supporting constants, classes, etc.

In [None]:
#r "nuget: Microsoft.SemanticKernel, 0.18.230725.3-preview"
#r "nuget: Microsoft.Extensions.Logging.Console"
#r "nuget: dotenv.net"

using System;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.ComponentModel;
using System.Net.Http;
using Microsoft.Extensions.Logging;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Memory;
using Microsoft.SemanticKernel.SkillDefinition;
using Microsoft.SemanticKernel.Skills.Core;
using dotenv.net;
using InteractiveKernel = Microsoft.DotNet.Interactive.Kernel;

#!import Utils/ConsoleLogger.cs
#!import Models/Models.cs

const string MemoryCollectionName = "LearningsCollection";

### Read the API Key and endpoints from environment variables or the .env file

In [None]:
DotEnv.Load();
var deploymentName = Environment.GetEnvironmentVariable("GPT_OPENAI_DEPLOYMENT_NAME");
var endpoint = Environment.GetEnvironmentVariable("GPT_OPENAI_ENDPOINT");
var apiKey = Environment.GetEnvironmentVariable("GPT_OPENAI_KEY");
var adaDeploymentName = "ada";
Console.WriteLine($"Using deployment: {deploymentName} at: {endpoint} with key {apiKey.Substring(0, 5)}...");

### Get a kernel instance configured for text completions and embeddings

In [None]:
var kernel = Kernel.Builder
            .WithLogger(ConsoleLogger.Log)
            .WithAzureChatCompletionService(deploymentName, endpoint, apiKey)
            .WithAzureTextEmbeddingGenerationService(adaDeploymentName, endpoint, apiKey)
            .WithMemoryStorage(new VolatileMemoryStore())
            .Build();

## Ingestion

### Read and deserialize the JSON learnings data file

In [None]:
var jsonFileContents = System.IO.File.ReadAllText("data/learnings.json");
var learnings = System.Text.Json.JsonSerializer.Deserialize<List<Learning>>(jsonFileContents);
learnings

### Chunk the learnings & recommendations

**Note:** This is a simple chunker. It chunks by splitting the document into paragraphs. A more realistic chunker would try to optimize the token size limit, chunking smartly (not in the a middle of a paragraph or sentence), etc.

In [None]:
// Keep a list of chunks
var chunks = new List<Chunk>();

// For each learning process the chunks
foreach(var learning in learnings)
{
    // Break the learnings into paragraphs
    var paragraphs = learning.Content.Split("\n\n");
    
    // For each paragraph create a chunk
    for(var i=0;i<paragraphs.Length;i++)
    {
        // Add the chunk to the list
        chunks.Add(new Chunk(learning.Id+"-"+(i+1),paragraphs[i]));
    }
}

### Save memories for every chunk

In [None]:
foreach(var chunk in chunks)
{    
    await kernel.Memory.SaveInformationAsync(MemoryCollectionName, id: chunk.Id, text: chunk.Text);
}

## Grounding

### Retrieve the memory based on a query

In [None]:
var query = await InteractiveKernel.GetInputAsync("What is your query?");

IAsyncEnumerable<MemoryQueryResult> queryResults =
                kernel.Memory.SearchAsync(MemoryCollectionName, query, limit: 3, minRelevanceScore: 0.77);


### Find memories based on query, and collect the text in the memories to augment the prompt

In [None]:
// Keep a list of the memories
StringBuilder promptData = new StringBuilder();

await foreach (MemoryQueryResult r in queryResults)
{
    promptData.Append(r.Metadata.Text+"\n\n");
}

// Final augmented text
var augmentedText = promptData.ToString();

## Process Prompt & Completion

### Prepare the context

In [None]:
var context = kernel.CreateNewContext();
context["query"] = query;
context["data"] = augmentedText;

### Answer the question with the same query but with the augmented prompt

In [None]:
var fixedFunction = kernel.CreateSemanticFunction($"{query}\n\nUsing only the following text:\n\"\"\"{augmentedText}\n\"\"\"", maxTokens: 1000);
var result = await fixedFunction.InvokeAsync();
Console.WriteLine(result);