
### This Interactive Notebook was generated by ML.NET Tooling.

The code below demonstrates how to

1. Define the model input and output schema
1. Load in data from a text file to an IDataView
1. Set up the training pipeline with data transforms
1. Choose an algorithm and append it to the pipeline
1. Train the model
1. Evaluate the model
1. Consume the model


## Install the necessary NuGet packages for training ML.NET model and plotting:

In [1]:

/* ML.NET Model Builder generated Notebook file. Notebook files contain both code snippets and rich text elements.
Use the "run" button in the left margin to execute each code snippet and explore ML.NET. */

#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" 
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" 

#r "nuget:Microsoft.ML,1.7.0"
#r "nuget:Microsoft.ML.FastTree,1.7.0"
#r "nuget:Microsoft.Data.Analysis,0.19.0"


In [1]:

// Import common usings.
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;
using Microsoft.DotNet.Interactive.Formatting;
using Microsoft.Data.Analysis;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

## Define the model input and output schemas:

In [1]:
﻿// Define the model input schema (which columns you will be loading in for training)
public class ModelInput
{
    [ColumnName(@"col0"), LoadColumn(0)]
    public float Col0 { get; set; }
    
    [ColumnName(@"col1"), LoadColumn(1)]
    public float Col1 { get; set; }
    
    [ColumnName(@"col2"), LoadColumn(2)]
    public float Col2 { get; set; }
    
    [ColumnName(@"col3"), LoadColumn(3)]
    public float Col3 { get; set; }
    
    [ColumnName(@"col4"), LoadColumn(4)]
    public string Col4 { get; set; }
    
}


// Define the model output schema (what the model will return)
public class ModelOutput
{
    [ColumnName("PredictedLabel")]
    public string PredictedLabel { get; set;}

    public float[] Score { get; set;}
}




## Create MLContext and load training data:

In [1]:
// Create a new MLContext (the starting point for all ML.NET operations)
var mlContext = new MLContext();

// Define path to training data
string trainDataPath = @"C:\Datasets\iris.data.txt";

// Load data from a text file to an IDataView (a flexible, efficient way of describing tabular data)
IDataView trainData = mlContext.Data.LoadFromTextFile<ModelInput>(
    path: trainDataPath ,
    hasHeader: false ,
    separatorChar: ',',
    allowQuoting: true,
    allowSparse: false);

// Display training data schema
display(trainData.Schema); 



In [1]:
display(h4("Showing 5 rows from training DataView:"));
var fewRows = mlContext.Data.CreateEnumerable<ModelInput>(trainData, reuseRowObject: false)
                    .Take(5)
                    .ToList();
display(fewRows);

## Create the training pipeline, choose an algorithm, and train the model:

In [1]:
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.FastTree;
using Microsoft.ML.Trainers;
using Microsoft.ML;
// Append the trainer to the data processing pipeline
var pipeline = mlContext.Transforms.ReplaceMissingValues(new []{new InputOutputColumnPair(@"col0", @"col0"),new InputOutputColumnPair(@"col1", @"col1"),new InputOutputColumnPair(@"col2", @"col2"),new InputOutputColumnPair(@"col3", @"col3")})      
                 .Append(mlContext.Transforms.Concatenate(@"Features", new []{@"col0",@"col1",@"col2",@"col3"}))      
                 .Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName:@"col4",inputColumnName:@"col4"))      
                 .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryEstimator:mlContext.BinaryClassification.Trainers.FastTree(new FastTreeBinaryTrainer.Options(){NumberOfLeaves=4,MinimumExampleCountPerLeaf=20,NumberOfTrees=4,MaximumBinCountPerFeature=256,FeatureFraction=1,LearningRate=0.1,LabelColumnName=@"col4",FeatureColumnName=@"Features"}),labelColumnName: @"col4"))      
                 .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName:@"PredictedLabel",inputColumnName:@"PredictedLabel"));

// Train the model (fit the model to the training data)
var model = pipeline.Fit(trainData);



## Consume the model

In [1]:
﻿ // Define sample model input
var sampleData = new ModelInput()
{
    Col0 = 4.9F,
    Col1 = 3F,
    Col2 = 1.4F,
    Col3 = 0.2F,
};

// Create a Prediction Engine (used to make single predictions)
var predEngine = mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(model);
// Use the model and Prediction Engine to predict on new sample data
var predictionResult = predEngine.Predict(sampleData);
Console.WriteLine("Using model to make single prediction -- Comparing actual Col4 with predicted Col4 from sample data...\n\n");

Console.WriteLine($"Col0: {4.9F}");
Console.WriteLine($"Col1: {3F}");
Console.WriteLine($"Col2: {1.4F}");
Console.WriteLine($"Col3: {0.2F}");
Console.WriteLine($"Col4: {@"Iris-setosa"}");

Console.WriteLine($"\n\nPredicted Col4: {predictionResult.PredictedLabel}\n\n");


## Evaluate the model:

In [1]:
// Evaluate the model using the cross validation method
// Learn more about cross validation at https://aka.ms/mlnet-cross-validation

var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(trainData, pipeline, numberOfFolds: 5, labelColumnName:"col4");

var metricsInMultipleFolds = crossValidationResults.Select(r => r.Metrics);

var microAccuracyValues = metricsInMultipleFolds.Select(m => m.MicroAccuracy);
var microAccuracyAverage = microAccuracyValues.Average();
var microAccuraciesStdDeviation = CalculateStandardDeviation(microAccuracyValues);
var microAccuraciesConfidenceInterval95 = CalculateConfidenceInterval95(microAccuracyValues);

var macroAccuracyValues = metricsInMultipleFolds.Select(m => m.MacroAccuracy);
var macroAccuracyAverage = macroAccuracyValues.Average();
var macroAccuraciesStdDeviation = CalculateStandardDeviation(macroAccuracyValues);
var macroAccuraciesConfidenceInterval95 = CalculateConfidenceInterval95(macroAccuracyValues);

var logLossValues = metricsInMultipleFolds.Select(m => m.LogLoss);
var logLossAverage = logLossValues.Average();
var logLossStdDeviation = CalculateStandardDeviation(logLossValues);
var logLossConfidenceInterval95 = CalculateConfidenceInterval95(logLossValues);

var logLossReductionValues = metricsInMultipleFolds.Select(m => m.LogLossReduction);
var logLossReductionAverage = logLossReductionValues.Average();
var logLossReductionStdDeviation = CalculateStandardDeviation(logLossReductionValues);
var logLossReductionConfidenceInterval95 = CalculateConfidenceInterval95(logLossReductionValues);

// Print out the evaluation metrics
var metricNames = new StringDataFrameColumn("Metric Name", new[] {"Average MicroAccuracy", "Average MacroAccuracy", "Average LogLoss", "Average LogLossReduction"});
var metricValues = new StringDataFrameColumn("Value",new[] {$"{microAccuracyAverage:#.###}", $"{macroAccuracyAverage:#.###}", $"{logLossAverage:#.###}", $"{logLossReductionAverage:#.###}"});
var standardDeviationValues = new StringDataFrameColumn("Standard deviation",new[] {$"{microAccuraciesStdDeviation:#.###}", $"{macroAccuraciesStdDeviation:#.###}", $"{logLossStdDeviation:#.###}", $"{logLossReductionStdDeviation:#.###}"});
var intervalValues = new StringDataFrameColumn("Confidence Interval 95%n",new[] {$"{microAccuraciesConfidenceInterval95:#.###}", $"{macroAccuraciesConfidenceInterval95:#.###}", $"{logLossConfidenceInterval95:#.###}", $"{logLossReductionConfidenceInterval95:#.###}"});

var stats = new DataFrame(metricNames, metricValues, standardDeviationValues, intervalValues);

public static double CalculateStandardDeviation(IEnumerable<double> values)
{
    double average = values.Average();
    double sumOfSquaresOfDifferences = values.Select(val => (val - average) * (val - average)).Sum();
    double standardDeviation = Math.Sqrt(sumOfSquaresOfDifferences / (values.Count() - 1));
    return standardDeviation;
}

public static double CalculateConfidenceInterval95(IEnumerable<double> values)
{
    double confidenceInterval95 = 1.96 * CalculateStandardDeviation(values) / Math.Sqrt((values.Count() - 1));
    return confidenceInterval95;
}

stats

