1. Add required nugets for demo

In [1]:
#r "nuget:Microsoft.ML,*-*"
#r "nuget:Microsoft.Data.Analysis, *-*"
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab,*-*"

2. Add global usings

In [1]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.Data.Analysis;
using System.IO;
using System.Text;

3. Declare input data model

In [1]:
public class BreastCancerInput
{
	[LoadColumn(0)]
	public float Id {get;set;}

	[LoadColumn(1)]
	public bool Label {get;set;}

	[LoadColumn(2)]
	public float RadiusMean {get;set;}

	[LoadColumn(3)]
	public float TextureMean {get;set;}

	[LoadColumn(4)]
	public float PerimeterMean {get;set;}

	[LoadColumn(5)]
	public float AreaMean {get;set;}

	[LoadColumn(6)]
	public float SmoothnessMean {get;set;}

	[LoadColumn(7)]
	public float CompactnessMean {get;set;}

	[LoadColumn(8)]
	public float ConcavityMean {get;set;}

	[LoadColumn(9)]
	public float ConcaveMean {get;set;}

	[LoadColumn(10)]
	public float SymmetryMean {get;set;}

	[LoadColumn(11)]
	public float FractialDimensionMean {get;set;}

	[LoadColumn(12)]
	public float RadiusSe {get;set;}

	[LoadColumn(13)]
	public float TextureSe {get;set;}

	[LoadColumn(14)]
	public float PerimeterSe {get;set;}

	[LoadColumn(15)]
	public float AreaSe {get;set;}

	[LoadColumn(16)]
	public float SmoothnessSe {get;set;}

	[LoadColumn(17)]
	public float CompactnessSe {get;set;}

	[LoadColumn(18)]
	public float ConcavitySe {get;set;}

	[LoadColumn(19)]
	public float ConcaveSe {get;set;}

	[LoadColumn(20)]
	public float SymmetrySe {get;set;}

	[LoadColumn(21)]
	public float FractialDimensionSe {get;set;}

	[LoadColumn(22)]
	public float RadiusWorst {get;set;}

	[LoadColumn(23)]
	public float TextureWorst {get;set;}

	[LoadColumn(24)]
	public float PerimeterWorst {get;set;}

	[LoadColumn(25)]
	public float AreaWorst {get;set;}

	[LoadColumn(26)]
	public float SmoothnessWorst {get;set;}

	[LoadColumn(27)]
	public float CompactnessWorst {get;set;}

	[LoadColumn(28)]
	public float ConcavityWorst {get;set;}

	[LoadColumn(29)]
	public float ConcaveWorst {get;set;}

	[LoadColumn(30)]
	public float SymmetryWorst {get;set;}

	[LoadColumn(31)]
	public float FractialDimensionWorst {get;set;}
}

4. Declare the output model

In [1]:
public class BreastCancerOutput
{
	[ColumnName("PredictedLabel")]
	public bool IsMalignant {get;set;}
}

5. Load the dataset and split it into test and training sets

In [1]:
var mlContext = new MLContext();
var trainingData = mlContext.Data.LoadFromTextFile<BreastCancerInput>("../../../ML.NET.Demo/Assets/breast-cancer.csv", hasHeader: true, separatorChar: ',');
var dataSplit = mlContext.Data.TrainTestSplit(trainingData, testFraction: 0.3);

6. *Optional* view loaded dataset

In [1]:
dataSplit.TestSet.ToTabularDataResource().Display();

7. Train our model. (Feature Engineering)

In [1]:
var model = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: "Label", featureColumnName: "Features");


var pipeline = mlContext.Transforms.Concatenate("Features", new []{
												nameof(BreastCancerInput.RadiusMean),
												nameof(BreastCancerInput.TextureMean),
												nameof(BreastCancerInput.PerimeterMean),
												nameof(BreastCancerInput.AreaMean),
												nameof(BreastCancerInput.SmoothnessMean),
												nameof(BreastCancerInput.CompactnessMean),
												nameof(BreastCancerInput.ConcavityMean),
												nameof(BreastCancerInput.ConcaveMean),
												nameof(BreastCancerInput.SymmetryMean),
												nameof(BreastCancerInput.FractialDimensionMean),
												nameof(BreastCancerInput.RadiusSe),
												nameof(BreastCancerInput.TextureSe),
												nameof(BreastCancerInput.PerimeterSe),
												nameof(BreastCancerInput.AreaSe),
												nameof(BreastCancerInput.SmoothnessSe),
												nameof(BreastCancerInput.CompactnessSe),
												nameof(BreastCancerInput.ConcaveSe),
												nameof(BreastCancerInput.ConcavitySe),
												nameof(BreastCancerInput.SymmetrySe),
												nameof(BreastCancerInput.FractialDimensionSe),
												nameof(BreastCancerInput.RadiusWorst),
												nameof(BreastCancerInput.TextureWorst),
												nameof(BreastCancerInput.PerimeterWorst),
												nameof(BreastCancerInput.AreaWorst),
												nameof(BreastCancerInput.SmoothnessWorst),
												nameof(BreastCancerInput.CompactnessWorst),
												nameof(BreastCancerInput.ConcaveWorst),
												nameof(BreastCancerInput.ConcavityWorst),
												nameof(BreastCancerInput.SymmetryWorst),
												nameof(BreastCancerInput.FractialDimensionWorst)
												})
					.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features")
					.Append(model));

var trainedModel = pipeline.Fit(dataSplit.TrainSet);

8. Evaluate the model

In [1]:
var testSet = trainedModel.Transform(dataSplit.TestSet);
var metrics = mlContext.BinaryClassification.Evaluate(testSet);

metrics

LogLoss,LogLossReduction,Entropy,AreaUnderRocCurve,Accuracy,PositivePrecision,PositiveRecall,NegativePrecision,NegativeRecall,F1Score,AreaUnderPrecisionRecallCurve,ConfusionMatrix
0.2429329449396614,0.7324500024329291,0.9079908321761868,0.9908226810881678,0.9580838323353292,0.9607843137254902,0.9074074074074074,0.956896551724138,0.9823008849557522,0.9333333333333332,0.9838390719351008,"{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.9607843137254902, 0.9568965517241379 ], PerClassRecall: [ 0.9074074074074074, 0.9823008849557522 ], Counts: [ [ 49, 5 ], [ 2, 111 ] ], NumberOfClasses: 2 }"


9. Save the model

In [1]:
mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, "./BinaryClassification.mdl");

10. Load the model and test

In [1]:
var newSample = new BreastCancerInput
{
	Id = 842517F,
    RadiusMean = 20.57F,
    TextureMean = 17.77F,
    PerimeterMean = 132.9F,
    AreaMean = 1326F,
    SmoothnessMean = 0.08474F,
    CompactnessMean = 0.07864F,
    ConcavityMean = 0.0869F,
    ConcaveMean = 0.07017F,
    SymmetryMean = 0.1812F,
    FractialDimensionMean = 0.05667F,
    RadiusSe = 0.5435F,
    TextureSe = 0.7339F,
    PerimeterSe = 3.398F,
    AreaSe = 74.08F,
    SmoothnessSe = 0.005225F,
    CompactnessSe = 0.01308F,
    ConcavitySe = 0.0186F,
    ConcaveSe = 0.0134F,
    SymmetrySe = 0.01389F,
    FractialDimensionSe = 0.003532F,
    RadiusWorst = 24.99F,
    TextureWorst = 23.41F,
    PerimeterWorst = 158.8F,
    AreaWorst = 1956F,
    SmoothnessWorst = 0.1238F,
    CompactnessWorst = 0.1866F,
    ConcavityWorst = 0.2416F,
    ConcaveWorst = 0.186F,
    SymmetryWorst = 0.275F,
    FractialDimensionWorst = 0.08902F,
};

using (var stream = new FileStream("./BinaryClassification.mdl", FileMode.Open, FileAccess.Read))
{
    var model = mlContext.Model.Load(stream, out var _);
    var predictionEngine = mlContext.Model.CreatePredictionEngine<BreastCancerInput, BreastCancerOutput>(model);
    var results = predictionEngine.Predict(newSample);

    results.Display();
}

IsMalignant
True
