Add Packages

In [None]:
#r "nuget:Microsoft.ML, *-*"
#r "nuget:Microsoft.ML.Recommender, *-*"
#r "nuget:Microsoft.Data.Analysis, *-*"
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab, *-*"

Loading extensions from `Microsoft.Data.Analysis.Interactive.dll`

Loading extensions from `Microsoft.DotNet.Interactive.ExtensionLab.dll`

In [None]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.Data.Analysis;
using Microsoft.ML.Trainers.Recommender;

using System.IO;
using System.Text;

Data Models

In [None]:
public class SentimentData
{
    [LoadColumn(0)]
    public string SentimentText;

    [LoadColumn(1), ColumnName("Label")]
    public bool Sentiment;
}

public class SentimentPrediction : SentimentData
{
    [ColumnName("Predicted Label")]
    public bool Prediction { get; set; }

    [ColumnName("Probability")]
    public float Probability {get; set; }

    public float Score { get; set; }
}

Load And Split Data

In [None]:
var mlContext = new MLContext();
var data = mlContext.Data.LoadFromTextFile<SentimentData>("./Data/imdb_labelled.txt", hasHeader: false);

// Split data into testing and training data
var dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);

Train The Model

In [None]:
var pipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(SentimentData.SentimentText));
var model = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features");

var trainingPipeline = pipeline.Append(model);
var trainedModel = trainingPipeline.Fit(dataSplit.TrainSet);

Evaluate The Model

In [None]:
var eval = trainedModel.Transform(dataSplit.TestSet);
var metrics = mlContext.BinaryClassification.Evaluate(eval);

In [None]:
metrics

LogLoss,LogLossReduction,Entropy,AreaUnderRocCurve,Accuracy,PositivePrecision,PositiveRecall,NegativePrecision,NegativeRecall,F1Score,AreaUnderPrecisionRecallCurve,ConfusionMatrix
0.8240216438011468,0.1735820420374069,0.9971003604914952,0.8182044515261664,0.7394366197183099,0.7153284671532847,0.7368421052631579,0.7619047619047619,0.7417218543046358,0.7259259259259261,0.818909158207526,"{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.7153284671532847, 0.7619047619047619 ], PerClassRecall: [ 0.7368421052631579, 0.7417218543046358 ], Counts: [ [ 98, 35 ], [ 39, 112 ] ], NumberOfClasses: 2 }"


Save The Model

In [None]:
mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, "./Sentiment.mdl");

Load The Model And Predict

In [None]:
using (var stream = new FileStream("./Sentiment.mdl", FileMode.Open, FileAccess.Read, FileShare.Read))
{
    var loadedModel = mlContext.Model.Load(stream, out _);
    var predictionEngine = mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(loadedModel);

    var newSample = new SentimentData
    {
        SentimentText = "This is good"
    };

    var sentiment = predictionEngine.Predict(newSample);
    Console.WriteLine(sentiment.Prediction);
}

False
