1. Install nugets for demo

In [1]:
#r "nuget:Microsoft.ML,*-*"
#r "nuget:Microsoft.Data.Analysis, *-*"
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab,*-*"

Loading extensions from `Microsoft.Data.Analysis.Interactive.dll`

2. Declare global usings

In [1]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.Data.Analysis;
using System.IO;
using System.Text;

3. Declare input data model

In [1]:
public class DiamondInput
{
	[LoadColumn(0)]
	public float Carat {get;set;}

	[LoadColumn(1)]
	public string Label {get;set;}

	[LoadColumn(2)]
	public string Color {get;set;}

	[LoadColumn(3)]
	public string Clarity {get;set;}

	[LoadColumn(4)]
	public string Polish {get;set;}

	[LoadColumn(5)]
	public string Symmetry {get;set;}

	[LoadColumn(6)]
	public string Report {get;set;}

	[LoadColumn(7)]
	public float Price {get;set;}
}

4. Declare output data model

In [1]:
public class DiamondOutput
{
	[ColumnName("PredictedLabel")]
	public string PredictedLabel {get;set;}
}

5. Load the dataset and split into test and training sets

In [1]:
var mlContext = new MLContext();
var trainingData = mlContext.Data.LoadFromTextFile<DiamondInput>("../../../ML.NET.Demo/Assets/diamond.csv", hasHeader: true, separatorChar: ',');
var dataSplit = mlContext.Data.TrainTestSplit(trainingData, testFraction: 0.3);

6. *Optional* Display dataset

In [1]:
dataSplit.TestSet.ToTabularDataResource().Display();

Carat,Label,Color,Clarity,Polish,Symmetry,Report,Price
1.53,Ideal,E,SI1,ID,ID,AGSL,12791
1.0,Very Good,D,SI1,VG,G,GIA,5747
0.91,Ideal,D,VS2,VG,VG,GIA,6224
1.01,Good,I,SI1,VG,VG,GIA,4238
2.6,Ideal,G,VS2,EX,EX,GIA,37621
2.41,Very Good,H,VS2,EX,EX,GIA,27337
1.01,Ideal,E,VVS2,EX,EX,GIA,9735
2.04,Very Good,D,VVS1,VG,VG,GIA,41867
1.01,Very Good,F,VS2,VG,VG,GIA,6257
1.01,Good,G,SI1,VG,G,GIA,4905


7. Train the model

In [1]:
var model = mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated(labelColumnName: "Label", featureColumnName: "Features");

var pipeline = mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: nameof(DiamondInput.Label), outputColumnName: "Label")
				.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: nameof(DiamondInput.Clarity), outputColumnName: "ClarityFeature"))
				.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: nameof(DiamondInput.Color), outputColumnName: "ColorFeature"))
				.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: nameof(DiamondInput.Polish), outputColumnName: "PolishFeature"))
				.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: nameof(DiamondInput.Report), outputColumnName: "ReportFeature"))
				.Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: nameof(DiamondInput.Symmetry), outputColumnName: "ClarityFeature"))
				.Append(mlContext.Transforms.Concatenate("Features", new[]
															{
																"ClarityFeature",
																"ColorFeature",
																"PolishFeature",
																"ReportFeature",
																"ClarityFeature",
																nameof(DiamondInput.Price)
															}))
				.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
				.Append(model)
				.Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

var trainedModel = pipeline.Fit(dataSplit.TrainSet);

8. Evaluate the model

In [1]:
var testSet = trainedModel.Transform(dataSplit.TestSet);
var metrics = mlContext.MulticlassClassification.Evaluate(testSet);

metrics

LogLoss,LogLossReduction,MacroAccuracy,MicroAccuracy,TopKAccuracy,TopKPredictionCount,TopKAccuracyForAllK,PerClassLogLoss,ConfusionMatrix
3.862747985690637,-2.182956643887396,0.2796749770610808,0.5604099244875944,0,0,<null>,"[ 3.5163566594743147, 4.330763328129494, 3.7123409862100183, 2.7788407099540557, 11.82147854028524 ]","{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.6384803921568627, 0, 0.501953125, 0.36363636363636365, 0 ], PerClassRecall: [ 0.6578282828282829, 0, 0.7229254571026723, 0.01762114537444934, 0 ], Counts: [ [ 521, 0, 268, 0, 3 ], [ 5, 0, 41, 2, 0 ], [ 192, 0, 514, 5, 0 ], [ 22, 0, 201, 4, 0 ], [ 76, 0, 0, 0, 0 ] ], NumberOfClasses: 5 }"


9. Save the model

In [1]:
mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, "./MultiClassification.mdl");

10. Load the model and test

In [1]:
var newSample = new DiamondInput
{
    Carat = 1.1f,
	Color = "H",
	Clarity = "SI1",
	Polish = "VG",
	Symmetry = "EX",
	Report = "GIA",
	Price = 5169
};

using (var stream = new FileStream("./MultiClassification.mdl", FileMode.Open, FileAccess.Read))
{
    var model = mlContext.Model.Load(stream, out _);
    var predictionEngine = mlContext.Model.CreatePredictionEngine<DiamondInput, DiamondOutput>(model);

    var result = predictionEngine.Predict(newSample);

    Console.WriteLine("Prediction: " + result.PredictedLabel);
}

Prediction: Ideal
