1. Add required nugets for demo

In [1]:
#r "nuget:Microsoft.ML,*-*"
#r "nuget:Microsoft.Data.Analysis, *-*"
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab,*-*"

2. Add global usings

In [1]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.Data.Analysis;
using System.IO;
using System.Text;

3. Declare input data model

In [1]:
public class BreastCancerInput
{
	[LoadColumn(0)]
	public float Id {get;set;}

	[LoadColumn(1)]
	public bool Label {get;set;}

	[LoadColumn(2)]
	public float RadiusMean {get;set;}

	[LoadColumn(3)]
	public float TextureMean {get;set;}

	[LoadColumn(4)]
	public float PerimeterMean {get;set;}

	[LoadColumn(5)]
	public float AreaMean {get;set;}

	[LoadColumn(6)]
	public float SmoothnessMean {get;set;}

	[LoadColumn(7)]
	public float CompactnessMean {get;set;}

	[LoadColumn(8)]
	public float ConcavityMean {get;set;}

	[LoadColumn(9)]
	public float ConcaveMean {get;set;}

	[LoadColumn(10)]
	public float SymmetryMean {get;set;}

	[LoadColumn(11)]
	public float FractialDimensionMean {get;set;}

	[LoadColumn(12)]
	public float RadiusSe {get;set;}

	[LoadColumn(13)]
	public float TextureSe {get;set;}

	[LoadColumn(14)]
	public float PerimeterSe {get;set;}

	[LoadColumn(15)]
	public float AreaSe {get;set;}

	[LoadColumn(16)]
	public float SmoothnessSe {get;set;}

	[LoadColumn(17)]
	public float CompactnessSe {get;set;}

	[LoadColumn(18)]
	public float ConcavitySe {get;set;}

	[LoadColumn(19)]
	public float ConcaveSe {get;set;}

	[LoadColumn(20)]
	public float SymmetrySe {get;set;}

	[LoadColumn(21)]
	public float FractialDimensionSe {get;set;}

	[LoadColumn(22)]
	public float RadiusWorst {get;set;}

	[LoadColumn(23)]
	public float TextureWorst {get;set;}

	[LoadColumn(24)]
	public float PerimeterWorst {get;set;}

	[LoadColumn(25)]
	public float AreaWorst {get;set;}

	[LoadColumn(26)]
	public float SmoothnessWorst {get;set;}

	[LoadColumn(27)]
	public float CompactnessWorst {get;set;}

	[LoadColumn(28)]
	public float ConcavityWorst {get;set;}

	[LoadColumn(29)]
	public float ConcaveWorst {get;set;}

	[LoadColumn(30)]
	public float SymmetryWorst {get;set;}

	[LoadColumn(31)]
	public float FractialDimensionWorst {get;set;}
}

4. Declare the output model

In [1]:
public class BreastCancerOutput
{
	[ColumnName("PredictedLabel")]
	public bool IsMalignant {get;set;}
}

5. Load the dataset and split it into test and training sets

In [1]:
var mlContext = new MLContext();
var trainingData = mlContext.Data.LoadFromTextFile<BreastCancerInput>("../../../ML.NET.Demo/Assets/breast-cancer.csv", hasHeader: true, separatorChar: ',');
var dataSplit = mlContext.Data.TrainTestSplit(trainingData, testFraction: 0.3);

6. *Optional* view loaded dataset

In [1]:
dataSplit.TestSet.ToTabularDataResource().Display();

Id,Label,RadiusMean,TextureMean,PerimeterMean,AreaMean,SmoothnessMean,CompactnessMean,ConcavityMean,ConcaveMean,SymmetryMean,FractialDimensionMean,RadiusSe,TextureSe,PerimeterSe,AreaSe,SmoothnessSe,CompactnessSe,ConcavitySe,ConcaveSe,SymmetrySe,FractialDimensionSe,RadiusWorst,TextureWorst,PerimeterWorst,AreaWorst,SmoothnessWorst,CompactnessWorst,ConcavityWorst,ConcaveWorst,SymmetryWorst,FractialDimensionWorst
843786,True,12.45,15.7,82.57,477.1,0.1278,0.17,0.1578,0.08089,0.2087,0.07613,0.3345,0.8902,2.217,27.19,0.00751,0.03345,0.03672,0.01137,0.02165,0.005082,15.47,23.75,103.4,741.6,0.1791,0.5249,0.5355,0.1741,0.3985,0.1244
844359,True,18.25,19.98,119.6,1040.0,0.09463,0.109,0.1127,0.074,0.1794,0.05742,0.4467,0.7732,3.18,53.91,0.004314,0.01382,0.02254,0.01039,0.01369,0.002179,22.88,27.66,153.2,1606.0,0.1442,0.2576,0.3784,0.1932,0.3063,0.08368
845636,True,16.02,23.24,102.7,797.8,0.08206,0.06669,0.03299,0.03323,0.1528,0.05697,0.3795,1.187,2.466,40.51,0.004029,0.009269,0.01101,0.007591,0.0146,0.003042,19.19,33.88,123.8,1150.0,0.1181,0.1551,0.1459,0.09975,0.2948,0.08452
849014,True,19.81,22.15,130.0,1260.0,0.09831,0.1027,0.1479,0.09498,0.1582,0.05395,0.7582,1.017,5.865,112.4,0.006494,0.01893,0.03391,0.01521,0.01356,0.001997,27.32,30.88,186.8,2398.0,0.1512,0.315,0.5372,0.2388,0.2768,0.07615
8510653,False,13.08,15.71,85.63,520.0,0.1075,0.127,0.04568,0.0311,0.1967,0.06811,0.1852,0.7477,1.383,14.67,0.004097,0.01898,0.01698,0.00649,0.01678,0.002425,14.5,20.49,96.09,630.5,0.1312,0.2776,0.189,0.07283,0.3184,0.08183
852973,True,15.3,25.27,102.4,732.4,0.1082,0.1697,0.1683,0.08751,0.1926,0.0654,0.439,1.012,3.498,43.5,0.005233,0.03057,0.03576,0.01083,0.01768,0.002967,20.27,36.71,149.3,1269.0,0.1641,0.611,0.6335,0.2024,0.4027,0.09876
853612,True,11.84,18.7,77.93,440.6,0.1109,0.1516,0.1218,0.05182,0.2301,0.07799,0.4825,1.03,3.475,41.0,0.005551,0.03414,0.04205,0.01044,0.02273,0.005667,16.82,28.12,119.4,888.7,0.1637,0.5775,0.6956,0.1546,0.4761,0.1402
85382600,True,17.02,23.98,112.8,899.3,0.1197,0.1496,0.2417,0.1203,0.2248,0.06382,0.6009,1.398,3.999,67.78,0.008268,0.03082,0.05042,0.01112,0.02102,0.003854,20.88,32.09,136.1,1344.0,0.1634,0.3559,0.5588,0.1847,0.353,0.08482
854941,False,13.03,18.42,82.61,523.8,0.08983,0.03766,0.02562,0.02923,0.1467,0.05863,0.1839,2.342,1.17,14.16,0.004352,0.004899,0.01343,0.01164,0.02671,0.001777,13.3,22.81,84.46,545.9,0.09701,0.04619,0.04833,0.05013,0.1987,0.06169
855167,True,13.44,21.58,86.18,563.0,0.08162,0.06031,0.0311,0.02031,0.1784,0.05587,0.2385,0.8265,1.572,20.53,0.00328,0.01102,0.0139,0.006881,0.0138,0.001286,15.93,30.25,102.5,787.9,0.1094,0.2043,0.2085,0.1112,0.2994,0.07146


7. Train our model. (Feature Engineering)

In [1]:
var model = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: "Label", featureColumnName: "Features");


var pipeline = mlContext.Transforms.Concatenate("Features", new []{
												nameof(BreastCancerInput.RadiusMean),
												nameof(BreastCancerInput.TextureMean),
												nameof(BreastCancerInput.PerimeterMean),
												nameof(BreastCancerInput.AreaMean),
												nameof(BreastCancerInput.SmoothnessMean),
												nameof(BreastCancerInput.CompactnessMean),
												nameof(BreastCancerInput.ConcavityMean),
												nameof(BreastCancerInput.ConcaveMean),
												nameof(BreastCancerInput.SymmetryMean),
												nameof(BreastCancerInput.FractialDimensionMean),
												nameof(BreastCancerInput.RadiusSe),
												nameof(BreastCancerInput.TextureSe),
												nameof(BreastCancerInput.PerimeterSe),
												nameof(BreastCancerInput.AreaSe),
												nameof(BreastCancerInput.SmoothnessSe),
												nameof(BreastCancerInput.CompactnessSe),
												nameof(BreastCancerInput.ConcaveSe),
												nameof(BreastCancerInput.ConcavitySe),
												nameof(BreastCancerInput.SymmetrySe),
												nameof(BreastCancerInput.FractialDimensionSe),
												nameof(BreastCancerInput.RadiusWorst),
												nameof(BreastCancerInput.TextureWorst),
												nameof(BreastCancerInput.PerimeterWorst),
												nameof(BreastCancerInput.AreaWorst),
												nameof(BreastCancerInput.SmoothnessWorst),
												nameof(BreastCancerInput.CompactnessWorst),
												nameof(BreastCancerInput.ConcaveWorst),
												nameof(BreastCancerInput.ConcavityWorst),
												nameof(BreastCancerInput.SymmetryWorst),
												nameof(BreastCancerInput.FractialDimensionWorst)
												})
					.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features")
					.Append(model));

var trainedModel = pipeline.Fit(dataSplit.TrainSet);

8. Evaluate the model

In [1]:
var testSet = trainedModel.Transform(dataSplit.TestSet);
var metrics = mlContext.BinaryClassification.Evaluate(testSet);

metrics

LogLoss,LogLossReduction,Entropy,AreaUnderRocCurve,Accuracy,PositivePrecision,PositiveRecall,NegativePrecision,NegativeRecall,F1Score,AreaUnderPrecisionRecallCurve,ConfusionMatrix
0.2429329449396614,0.7324500024329291,0.9079908321761868,0.9908226810881678,0.9580838323353292,0.9607843137254902,0.9074074074074074,0.956896551724138,0.9823008849557522,0.9333333333333332,0.9838390719351008,"{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.9607843137254902, 0.9568965517241379 ], PerClassRecall: [ 0.9074074074074074, 0.9823008849557522 ], Counts: [ [ 49, 5 ], [ 2, 111 ] ], NumberOfClasses: 2 }"


9. Save the model

In [1]:
mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, "./BinaryClassification.mdl");

10. Load the model and test

In [1]:
var newSample = new BreastCancerInput
{
	RadiusMean = 1f,
	TextureMean = 17.99f,
	PerimeterMean = 10.38f,
	AreaMean = 122.8f,
	SmoothnessMean = 1001f,
	CompactnessMean = 0.1184f,
	ConcavityMean = 0.2776f,
	ConcaveMean = 0.3001f,
	SymmetryMean = 0.1471f,
	FractialDimensionMean = 0.2419f,
	RadiusSe = 0.07871f,
	TextureSe = 1.095f,
	PerimeterSe = 0.9053f,
	AreaSe = 8.589f,
	SmoothnessSe = 153.4f,
	CompactnessSe = 0.006399f,
	ConcavitySe = 0.04904f,
	ConcaveSe = 0.05373f,
	SymmetrySe = 0.01587f,
	FractialDimensionSe = 0.03003f,
	RadiusWorst = 0.006193f,
	TextureWorst = 25.38f,
	PerimeterWorst = 17.33f,
	AreaWorst = 184.6f,
	SmoothnessWorst = 2019f,
	CompactnessWorst = 0.1622f,
	ConcavityWorst = 0.6656f,
	ConcaveWorst = 0.3001f,
	SymmetryWorst = 0.1471f,
	FractialDimensionWorst = 0.2419f,
};

using (var stream = new FileStream("./BinaryClassification.mdl", FileMode.Open, FileAccess.Read))
{
    var model = mlContext.Model.Load(stream, out var _);
    var predictionEngine = mlContext.Model.CreatePredictionEngine<BreastCancerInput, BreastCancerOutput>(model);
    var results = predictionEngine.Predict(newSample);

    results.Display();
}

IsMalignant
True
