In [1]:
#r "nuget: Microsoft.SemanticKernel, *-*"

In [2]:
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using System.Net.Http.Headers;
using System.Text.Json;

In [3]:
public class ChatMessage
{
        public string role { get; set; }
        public string content { get; set; }
}

In [4]:
public class ChatRequest
{
        public ChatRequest()
        {
            messages = new List<ChatMessage>();
            temperature = 0.7f;
            max_tokens = 2048;
            stream = false;
            model = "";
        }
        public string model { get; set; }
        public List<ChatMessage> messages { get; set; }
        public float temperature { get; set; }
        public int max_tokens { get; set; }
        public bool stream { get; set; }
        
}

In [5]:
    public class ChatResponseChoice
    {
        public int ChatResponseindex { get; set; }
        public ChatResponseMessage message { get; set; }
        public string finish_reason { get; set; }
    }

    public class ChatResponseMessage
    {
        public string role { get; set; }
        public string content { get; set; }
    }

    public class ChatResponse
    {
        public string id { get; set; }
        public string @object { get; set; }
        public int created { get; set; }
        public string model { get; set; }
        public List<ChatResponseChoice> choices { get; set; }
        public ChatResponseUsage usage { get; set; }
    }

    public class ChatResponseUsage
    {
        public int prompt_tokens { get; set; }
        public int completion_tokens { get; set; }
        public int total_tokens { get; set; }
    }

In [6]:
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text.Json;
using System.Threading;

using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;

In [7]:

    public class CustomChatCompletionService : IChatCompletionService
    {
        // public property for the model url endpoint
        public string ModelUrl { get; set; }
        public string ModelName { get; set; }
        public string ApiKey { get; set; }

        public IReadOnlyDictionary<string, object?> Attributes => throw new NotImplementedException();

        public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
        {
            using (var httpClient = new HttpClient())
            {
                    var root = new ChatRequest();
                    for (int i = 0; i < chatHistory.Count; i++)
                    {
                        var message = chatHistory[i];
                        var msg = new ChatMessage();
                        msg.role = message.Role.ToString().ToLower();
                        msg.content = message.Content;
                        root.messages.Add(msg);
                    }

                    // validate if ModelName is not empty and add it to the root object
                    if (!string.IsNullOrEmpty(ModelName))
                    {
                        root.model = ModelName;
                    }

                    // generate the json string from the root object
                    var jsonString = JsonSerializer.Serialize(root);

                    var request = new HttpRequestMessage
                    {
                        Method = HttpMethod.Post,
                        RequestUri = new Uri(ModelUrl),
                        Headers =
                        {
                            { "Authorization", "Bearer "+ ApiKey },
                        }
                    };

                    request.Content = new StringContent(jsonString);
                    request.Content.Headers.ContentType = MediaTypeHeaderValue.Parse("application/json");

                    // iterate though chatHistory and generate a json document based on the Root class
                    var httpResponse = await httpClient.SendAsync(request);

                    // get the response content
                    var responseContent = await httpResponse.Content.ReadAsStringAsync();

                    // deserialize the response content into a ChatResponse object
                    var chatResponse = JsonSerializer.Deserialize<ChatResponse>(responseContent);

                    // add httpResponse content to chatHistory
                    chatHistory.AddAssistantMessage(chatResponse.choices[0].message.content);
            }

            return chatHistory;
        }

        public IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
        {
            throw new NotImplementedException();
        }

    }

In [8]:
var nvidiaNIMChat = new CustomChatCompletionService();
nvidiaNIMChat.ModelUrl = "https://integrate.api.nvidia.com/v1/chat/completions";
nvidiaNIMChat.ModelName = "microsoft/phi-3-mini-128k-instruct";
nvidiaNIMChat.ApiKey = "Your Nvidia NIM API Key";

In [9]:
using Microsoft.Extensions.DependencyInjection;

In [10]:
var builder = Kernel.CreateBuilder();
builder.Services.AddKeyedSingleton<IChatCompletionService>("nvidiaNIMChat", nvidiaNIMChat);
var kernel = builder.Build();

In [11]:
var chat = kernel.GetRequiredService<IChatCompletionService>();
var history = new ChatHistory();
history.AddUserMessage("hi, who are you?");

In [12]:
var result = await chat.GetChatMessageContentsAsync(history);

In [13]:
result[^1].Content

Hello! I'm Phi, an AI developed by Microsoft. I'm here to help you with questions, provide information, and assist with a wide range of tasks. How can I help you today?