# ML.NET: Model with Intent Classification
This model will be used to demonstrate how to use a ML model to parse a query.

In [1]:
// ML.NET: Intent Classification + Slot Extraction (C#)
// Combined setup (original cells 1–5):
// - Notebook intro (summary)
// - Install NuGet packages
// - Usings
// - Environment sanity prints

#r "nuget: Microsoft.Recognizers.Text, 1.8.13"
#r "nuget: Microsoft.Recognizers.Text.DateTime, 1.8.13"
#r "nuget: Microsoft.ML, 4.0.2"
#r "nuget: Microsoft.ML.FastTree, 4.0.2"
#r "nuget: Microsoft.ML.LightGbm, 4.0.2"

using System;
using System.IO;
using System.Linq;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms.Text;
using Microsoft.ML.Trainers;
using Microsoft.Recognizers.Text;
using Microsoft.Recognizers.Text.DateTime;
using Microsoft.Recognizers.Text.Number;

Console.WriteLine("Recognizers OK");
Console.WriteLine("DateTime OK");
Console.WriteLine(Environment.UserName);
Console.WriteLine("ML.NET OK");
Console.WriteLine("Packages loaded ✔");

Recognizers OK
DateTime OK
maneki-neko
ML.NET OK
Packages loaded ✔


In [4]:

using System.IO;
using System.Text.Json;

public class QueryRecord
{
    // Raw user query text
    public string Text { get; set; } = string.Empty;

    // Intent label (string) – e.g. "GET_CONTACT_INFO"
    public string Label { get; set; } = string.Empty;
}

public class IntentPrediction
{
    // The predicted label (string) after mapping from key to l
    public string PredictedLabel { get; set; } = string.Empty;

    // Raw scores per class – useful for debugging
    public float[] Score { get; set; } = Array.Empty<float>();
}

Console.WriteLine("Models defined ✔");

// Load the seed data from a JSON file
// The file should contain an array of QueryRecord objects
// Example: [{"Text": "What is the weather today?", "Label": "GET_WEATHER"}]
var jsonPath = "./Data/intent_seed_v6.json";
if (!File.Exists(jsonPath))
{
    Console.WriteLine($"Seed file '{jsonPath}' not found. Please ensure it exists in the current directory.");
    return;
}

var json = File.ReadAllText(jsonPath);
var seed = JsonSerializer.Deserialize<List<QueryRecord>>(json, new JsonSerializerOptions
{
    PropertyNameCaseInsensitive = true
}) ?? new List<QueryRecord>();

// Console.WriteLine(string.Join(", ", seed.Take(5).Select(r => $"[{r.Label}] {r.Text}")));
Console.WriteLine($"Loaded rows: {seed.Count}");

Models defined ✔
Loaded rows: 363


In [5]:
// The answer to the Ultimate Question of Life, the Universe, and Everything: 42.
var ml = new MLContext(seed: 42);

// Load in-memory list as an IDataView
// IDataView is the core data structure in ML.NET, similar to a DataFrame.
// ML.NET's way of representing tabular data (like a table or DataFrame).
// It's an interface for data pipelines--ML.NET doesn't use plain lists or arrays directly for training.
// Conversts in-memory list to IDataView (seed is a List<QueryRecord>).
var data = ml.Data.LoadFromEnumerable(seed);

// Train/test split
// Splits the data into training and testing sets.
// - Traning set: Used to train the model.
// - Testing set: Used to evaluate the model's performance.
// This is important to avoid overfitting and ensure the model generalizes well to unseen data
// - testFraction: Percentage of data to use for testing (0.25 means 25% of the data will be used for testing).
var split = ml.Data.TrainTestSplit(data, testFraction: 0.25);

// Pipeline
var pipeline = ml.Transforms.Conversion.MapValueToKey(
                        inputColumnName: nameof(QueryRecord.Label),
                        outputColumnName: "Label")
              .Append(ml.Transforms.Text.FeaturizeText(
                        outputColumnName: "Features",
                        inputColumnName: nameof(QueryRecord.Text)))
              .Append(ml.MulticlassClassification.Trainers.SdcaMaximumEntropy(
                        labelColumnName: "Label", featureColumnName: "Features"))
              .Append(ml.Transforms.Conversion.MapKeyToValue(
                        outputColumnName: nameof(IntentPrediction.PredictedLabel),
                        inputColumnName: "PredictedLabel"));

ITransformer model;
try
{
    model = pipeline.Fit(split.TrainSet);
    Console.WriteLine("Model trained ✔");
}
catch (Exception ex)
{
    Console.WriteLine("Training failed: " + ex.Message);
    throw;
}

Console.WriteLine("Model trained ✔");

Model trained ✔
Model trained ✔


In [6]:
using Microsoft.ML.Data;

// 1) Score test set + evaluate
var scored = model.Transform(split.TestSet);
var metrics = ml.MulticlassClassification.Evaluate(
    scored,
    labelColumnName: "Label",
    scoreColumnName: "Score",
    predictedLabelColumnName: "PredictedLabel"
);

// 2) Get the exact class-name order used by metrics (from the Label key metadata)
var labelCol = scored.Schema["Label"];
VBuffer<ReadOnlyMemory<char>> keyValues = default;
labelCol.GetKeyValues(ref keyValues);
var classNames = keyValues.DenseValues().Select(v => v.ToString()).ToArray();

// 3) Print per-class logloss with names
Console.WriteLine("Per-class LogLoss:");
for (int i = 0; i < metrics.PerClassLogLoss.Count; i++)
{
    var name = i < classNames.Length ? classNames[i] : $"class_{i}";
    Console.WriteLine($"{i,2}: {name,-24} -> {metrics.PerClassLogLoss[i]:F3}");
}

// 4) Pretty confusion matrix with headers
var cm = metrics.ConfusionMatrix;
Console.WriteLine("\nConfusion Matrix (rows=true, cols=pred):");

string Pad(string s, int w) => s.Length > w ? s.Substring(0, w) : s.PadRight(w);

int w = 24;
Console.Write(Pad("true\\pred", w));
for (int j = 0; j < classNames.Length; j++)
    Console.Write(Pad(classNames[j], w));
Console.WriteLine();

for (int i = 0; i < cm.Counts.Count; i++)
{
    Console.Write(Pad(classNames[i], w));
    for (int j = 0; j < cm.Counts[i].Count; j++)
        Console.Write(Pad(cm.Counts[i][j].ToString(), w));
    Console.WriteLine();
}

// 5) Highlight the worst class by LogLoss
int worstIdx = Enumerable.Range(0, metrics.PerClassLogLoss.Count)
                         .OrderByDescending(k => metrics.PerClassLogLoss[k])
                         .First();
Console.WriteLine($"\nWorst by LogLoss: [{worstIdx}] {classNames[worstIdx]} = {metrics.PerClassLogLoss[worstIdx]:F3}");


Per-class LogLoss:
 0: GET_CONTACT_ADDRESS      -> 0.060
 1: GET_CONTACT_INFO         -> 0.328
 2: GET_CONTACT_PHONE        -> 0.235
 3: GET_CONTACT_EMAIL        -> 0.042
 4: FILTER_BY_HIRE_DATE      -> 0.605
 5: FILTER_BY_DEPARTMENT     -> 0.226
 6: FILTER_BY_LOCATION       -> 0.439
 7: FILTER_BY_BIRTHDAY       -> 0.251

Confusion Matrix (rows=true, cols=pred):
true\pred               GET_CONTACT_ADDRESS     GET_CONTACT_INFO        GET_CONTACT_PHONE       GET_CONTACT_EMAIL       FILTER_BY_HIRE_DATE     FILTER_BY_DEPARTMENT    FILTER_BY_LOCATION      FILTER_BY_BIRTHDAY      
GET_CONTACT_ADDRESS     8                       0                       0                       0                       0                       0                       0                       0                       
GET_CONTACT_INFO        0                       8                       0                       0                       0                       0                       0                       0        

In [7]:
// Evaluate the model on the test set
// This will give us metrics like accuracy, log loss, etc.
var testPredictions = model.Transform(split.TestSet);
var metrics = ml.MulticlassClassification.Evaluate(testPredictions, labelColumnName: "Label", scoreColumnName: "Score");

Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy:F3}");
Console.WriteLine($"MacroAccuracy: {metrics.MacroAccuracy:F3}");
Console.WriteLine($"LogLoss:       {metrics.LogLoss:F3}");
Console.WriteLine($"PerClassLogLoss: [{string.Join(", ", metrics.PerClassLogLoss.Select(v => v.ToString("F3")))}]");


MicroAccuracy: 0.929
MacroAccuracy: 0.924
LogLoss:       0.252
PerClassLogLoss: [0.060, 0.328, 0.235, 0.042, 0.605, 0.226, 0.439, 0.251]


In [8]:
// Save the model
var modelPath = Path.Combine(Directory.GetCurrentDirectory(), "intent_model.zip");
using (var fs = File.Create(modelPath))
{
    ml.Model.Save(model, split.TrainSet.Schema, fs);
}

Console.WriteLine($"Saved: {modelPath}");

// Reload
ITransformer reloadedModel;
using (var fs = File.OpenRead(modelPath))
{
    reloadedModel = ml.Model.Load(fs, out var schema);
}

var engine2 = ml.Model.CreatePredictionEngine<QueryRecord, IntentPrediction>(reloadedModel);
var check = engine2.Predict(new QueryRecord { Text = "list engineers" });
Console.WriteLine($"Reloaded model prediction: {check.PredictedLabel}");


Saved: /Users/maneki-neko/learning/NLP/intent_model.zip
Reloaded model prediction: FILTER_BY_DEPARTMENT
