1. Install nugets

In [1]:
#r "nuget:Microsoft.ML,*-*"
#r "nuget:Microsoft.Data.Analysis, *-*"
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab,*-*"

Loading extensions from `Microsoft.Data.Analysis.Interactive.dll`

2. Add global usings

In [1]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.Data.Analysis;
using System.IO;
using System.Text;

3. Add input data model

In [1]:
public class ReviewInput
{
	[LoadColumn(0)]
	public string Text {get;set;}

	[LoadColumn(1)]
	public bool Label {get;set;}
}

4. Add output data model

In [1]:
public class ReviewOutput
{
	[ColumnName("PredictedLabel")]
	public bool IsPositive {get;set;}
}

5. Load the dataset and split for test and training

In [1]:
var mlContext = new MLContext();
var trainingData = mlContext.Data.LoadFromTextFile<ReviewInput>("../../../ML.NET.Demo/Assets/movie_reviews.txt");
var dataSplit = mlContext.Data.TrainTestSplit(trainingData, testFraction: 0.2);

6. *Optional* Display dataset

In [1]:
dataSplit.TestSet.ToTabularDataResource().Display();

7. Train the model (Feature Engineering)

In [1]:
var model = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression();

var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", nameof(ReviewInput.Text))
					.Append(mlContext.Transforms.CopyColumns("Label", nameof(ReviewInput.Label)))
					.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
					.Append(model);

var trainedModel = pipeline.Fit(dataSplit.TrainSet);

8. Evaluate model

In [1]:
var testSet = trainedModel.Transform(dataSplit.TestSet);
var metrics = mlContext.BinaryClassification.Evaluate(testSet);

metrics

LogLoss,LogLossReduction,Entropy,AreaUnderRocCurve,Accuracy,PositivePrecision,PositiveRecall,NegativePrecision,NegativeRecall,F1Score,AreaUnderPrecisionRecallCurve,ConfusionMatrix
1.0388958512377804,-0.0394319553795088,0.9994842335383728,0.8379120879120879,0.7593582887700535,0.7446808510638298,0.7692307692307693,0.7741935483870968,0.75,0.7567567567567567,0.8499357270636541,"{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.7446808510638298, 0.7741935483870968 ], PerClassRecall: [ 0.7692307692307693, 0.75 ], Counts: [ [ 70, 21 ], [ 24, 72 ] ], NumberOfClasses: 2 }"


9. Save the model

In [1]:
mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, "./Sentiment.mdl");

10. Load and test

In [1]:
var newSample = new ReviewInput
{
	Text = "It deserves strong love."
};

using (var stream = new FileStream("./Sentiment.mdl", FileMode.Open, FileAccess.Read))
{
    var model = mlContext.Model.Load(stream, out var _);
    var predictionEngine = mlContext.Model.CreatePredictionEngine<ReviewInput, ReviewOutput>(model);
    var results = predictionEngine.Predict(newSample);

    results.Display();
}

IsPositive
True
