1. Install need nugets for the demo

In [1]:
#r "nuget:Microsoft.ML,*-*"
#r "nuget:Microsoft.Data.Analysis, *-*"
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab,*-*"

2. Add global usings

In [1]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.Data.Analysis;
using System.IO;
using System.Text;

3. Add input data model

In [1]:
public class MallInput
{
	[LoadColumn(0)]
	public float CustId {get;set;}

	[LoadColumn(1)]
	public string Gender {get;set;}

	[LoadColumn(2)]
	public float Age {get;set;}

	[LoadColumn(3)]
	public float Income {get;set;}
	
	[LoadColumn(4)]
	public float Spending {get;set;}
}


4. Add output data model

In [1]:
public class MallOutput
{
	[ColumnName("Score")]
	public float[] Scores {get;set;}

	[ColumnName("PredictedLabel")]
	public UInt32 Label {get;set;}
}

5. Load the dataset and split for test and training splits

In [1]:
var mlContext = new MLContext();
var trainingData = mlContext.Data.LoadFromTextFile<MallInput>("../../../ML.NET.Demo/Assets/Mall_Customers.csv", hasHeader: true, separatorChar: ',');
var dataSplit = mlContext.Data.TrainTestSplit(trainingData, testFraction: 0.2);

6. *Optional* Display dataset

In [1]:
dataSplit.TestSet.ToTabularDataResource().Display();

7. Train the model (Feature Engineering)

In [1]:
var model = mlContext.Clustering.Trainers.KMeans(numberOfClusters: 3);

var pipeline = mlContext.Transforms.Text.FeaturizeText("GenderFeature", nameof(MallInput.Gender))
					.Append(mlContext.Transforms.Concatenate("Features", new[]
													{
														"GenderFeature",
														nameof(MallInput.Age),
														nameof(MallInput.CustId),
														nameof(MallInput.Income),
														nameof(MallInput.Spending),
													}))
					.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
					.Append(model);

var trainedModel = pipeline.Fit(dataSplit.TrainSet);

8. Evalute the model

In [1]:
var testSet = trainedModel.Transform(dataSplit.TestSet);
var metrics = mlContext.Clustering.Evaluate(data: testSet,
                                            labelColumnName: "PredictedLabel",
                                            scoreColumnName: "Score",
                                            featureColumnName: "Features");

metrics

NormalizedMutualInformation,AverageDistance,DaviesBouldinIndex
1,0.2006728172302246,1.2843640074688942


9. Save the model

In [1]:
mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, "./Clustering.mdl");

10. Load the model and test

In [1]:
var newSample = new MallInput
{
	Gender = "Male",
	Age = 19f,
	Income = 15f,
	Spending = 39f

};

using (var stream = new FileStream("./Clustering.mdl", FileMode.Open, FileAccess.Read))
{
    var model = mlContext.Model.Load(stream, out var _);
    var predictionEngine = mlContext.Model.CreatePredictionEngine<MallInput, MallOutput>(model);
    var results = predictionEngine.Predict(newSample);

    results.Display();
}

Scores,Label
"[ 0.45443535, 6.658345, 6.5866833 ]",1
