1. Install needed nugets for demo

In [1]:
#r "nuget:Microsoft.ML,*-*"
#r "nuget:Microsoft.Data.Analysis, *-*"
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab,*-*"
#r "nuget:Microsoft.ML.FastTree, *-*"

2. Add global usings

In [1]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.Data.Analysis;
using System.IO;
using System.Text;

3. Add input data model

In [1]:
public class AutismInput
{
	[LoadColumn(0)]
	public float Id {get;set;}

	[LoadColumn(1)]
	public float A1 {get;set;}

	[LoadColumn(2)]
	public float A2 {get;set;}

	[LoadColumn(3)]
	public float A3 {get;set;}

	[LoadColumn(4)]
	public float A4 {get;set;}

	[LoadColumn(5)]
	public float A5 {get;set;}

	[LoadColumn(6)]
	public float A6 {get;set;}

	[LoadColumn(7)]
	public float A7 {get;set;}

	[LoadColumn(8)]
	public float A8 {get;set;}

	[LoadColumn(9)]
	public float A9 {get;set;}

	[LoadColumn(10)]
	public float A10 {get;set;}

	[LoadColumn(11)]
	public float Age {get;set;}

	[LoadColumn(12)]
	public string Gender {get;set;}

	[LoadColumn(13)]
	public string Ethnicity {get;set;}

	[LoadColumn(14)]
	public string Jundice {get;set;}

	[LoadColumn(15)]
	public string Autism {get;set;}

	[LoadColumn(16)]
	public string Country {get;set;}

	[LoadColumn(17)]
	public string UsedApp {get;set;}

	[LoadColumn(18)]
	public float Result {get;set;}

	[LoadColumn(19)]
	public float AgeDesc {get;set;}

	[LoadColumn(20)]
	public string Relation {get;set;}

	[LoadColumn(21)]
	public bool Class {get;set;}
}

4. Add data model output

In [1]:
public class AutismOutput
{
	[ColumnName("PredictedLabel")]
	public bool ASD {get;set;}
}

5. Load the dataset and split for test and training sets

In [1]:
var mlContext = new MLContext();
var trainingData = mlContext.Data.LoadFromTextFile<AutismInput>("../../../ML.NET.Demo/Assets/csv_result-Autism_Data.csv", hasHeader: true, separatorChar: ',');
var dataSplit = mlContext.Data.TrainTestSplit(trainingData, testFraction: 0.3);

6. *Optional* Display dataset

In [1]:
dataSplit.TestSet.ToTabularDataResource().Display();

7. Train the model (Feature Engineering)

In [1]:
var model = mlContext.BinaryClassification.Trainers.FastTree();


var pipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "GenderFeature", inputColumnName: nameof(AutismInput.Gender))
					.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "EthinicityFeature", inputColumnName: nameof(AutismInput.Ethnicity)))
					.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "CountryFeature", inputColumnName: nameof(AutismInput.Country)))
					.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "JundiceFeature", inputColumnName: nameof(AutismInput.Jundice)))
					.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "AutismFeature", inputColumnName: nameof(AutismInput.Autism)))
					.Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "UsedAppFeature", inputColumnName: nameof(AutismInput.UsedApp)))
					.Append(mlContext.Transforms.Concatenate("Features", new []{
																nameof(AutismInput.A1),
																nameof(AutismInput.A2),
																nameof(AutismInput.A3),
																nameof(AutismInput.A4),
																nameof(AutismInput.A5),
																nameof(AutismInput.A6),
																nameof(AutismInput.A7),
																nameof(AutismInput.A8),
																nameof(AutismInput.A9),
																nameof(AutismInput.A10),
																nameof(AutismInput.Age),
																"GenderFeature",
																"JundiceFeature",
																"AutismFeature",
																"EthinicityFeature",
																"CountryFeature"
																}))
					.Append(mlContext.Transforms.CopyColumns("Label", nameof(AutismInput.Class)))
					.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features")
					.Append(model));

var trainedModel = pipeline.Fit(dataSplit.TrainSet);

8. Evaluate the model

In [1]:
var testSet = trainedModel.Transform(dataSplit.TestSet);
var metrics = mlContext.BinaryClassification.Evaluate(testSet);

metrics

LogLoss,LogLossReduction,Entropy,AreaUnderRocCurve,Accuracy,PositivePrecision,PositiveRecall,NegativePrecision,NegativeRecall,F1Score,AreaUnderPrecisionRecallCurve,ConfusionMatrix
0.0828897568838629,0.9010993274359176,0.8381111547057962,0.998178269355888,0.9848484848484848,0.9629629629629628,0.981132075471698,0.9930555555555556,0.986206896551724,0.97196261682243,0.9948376807564988,"{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.9629629629629629, 0.9930555555555556 ], PerClassRecall: [ 0.9811320754716981, 0.9862068965517241 ], Counts: [ [ 52, 1 ], [ 2, 143 ] ], NumberOfClasses: 2 }"


9. Save the model

In [1]:
mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, "./DTBinaryClassification.mdl");

10. Load and test

In [1]:
var newSample = new AutismInput
{
	Id = 3f,
	A1 = 1f,
	A2 = 1f,
	A3 = 1f,
	A4 = 1f,
	A5 = 0f,
	A6 = 0f,
	A7 = 1f,
	A8 = 1f,
	A9 = 0f,
	A10 = 0f,
	Age = 26f,
	Gender = "f",
	Ethnicity = "White-European",
	Jundice = "no",
	Autism = "no",
	Country = "United States",
	UsedApp = "no",
	Result = 6f,
	AgeDesc = 18f,
	Relation = "Self"
};

using (var stream = new FileStream("./DTBinaryClassification.mdl", FileMode.Open, FileAccess.Read))
{
    var model = mlContext.Model.Load(stream, out var _);
    var predictionEngine = mlContext.Model.CreatePredictionEngine<AutismInput, AutismOutput>(model);
    var results = predictionEngine.Predict(newSample);

    results.Display();
}

ASD
False
