Nuget

In [2]:
#r "nuget:Microsoft.ML"
#r "nuget:MathNet.Numerics"
#r "nuget:System.Data.Common"
#r "nuget:Microsoft.ML.AutoML"
#r "nuget:Microsoft.Data.DataFrame,0.1.1-e190920-1"

Reference

In [13]:
using System;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.AutoML;
using System.IO;
using System.Collections.Generic;
using System.Data;
using System.Text.RegularExpressions;
using System.Text;
using MathNet.Numerics.LinearAlgebra;
using System.Linq;
using System.Reflection;
using Microsoft.Data;
using XPlot.Plotly;
using Microsoft.AspNetCore.Html;

Formatter<DataFrame>.Register((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c)));
    var rows = new List<List<IHtmlContent>>();
    var take = 20;
    for (var i = 0; i < Math.Min(take, df.RowCount); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }
    
    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));
    
    writer.Write(t);
}, "text/html");

Model Class

In [14]:
public class ModelInput
    {
        [ColumnName("Ride Distance (km)"), LoadColumn(0)]
        public float Ride_Distance__km_ { get; set; }


        [ColumnName("Vehicle Type"), LoadColumn(1)]
        public string Vehicle_Type { get; set; }


        [ColumnName("Promo Value"), LoadColumn(2)]
        public float Promo_Value { get; set; }


        [ColumnName("Fare"), LoadColumn(3)]
        public float Fare { get; set; }


        [ColumnName("Paid"), LoadColumn(4)]
        public float Paid { get; set; }


        [ColumnName("Hour"), LoadColumn(5)]
        public float Hour { get; set; }


        [ColumnName("Minute"), LoadColumn(6)]
        public float Minute { get; set; }


        [ColumnName("Class"), LoadColumn(7)]
        public string Class { get; set; }


        [ColumnName("Day"), LoadColumn(8)]
        public string Day { get; set; }


        [ColumnName("Saving"), LoadColumn(9)]
        public float Saving { get; set; }


    }
    
     public class ModelOutput
    {
        // ColumnName attribute is used to change the column name from
        // its default value, which is the name of the field.
        [ColumnName("PredictedLabel")]
        public String Prediction { get; set; }
        public float[] Score { get; set; }
    }

function helpers

In [8]:
private static string TRAIN_DATA_FILEPATH = @"BusinessProfile4.csv";
private static string MODEL_FILEPATH = @"MLModel.zip";
private static MLContext mlContext = new MLContext(seed: 1);

public static void DoAutoML()
        {
            uint ExpTime = 60;
            Console.WriteLine($"AutoML is starting.. wait for {ExpTime} seconds");
            // Load Data
            IDataView trainingDataView = mlContext.Data.LoadFromTextFile<ModelInput>(
                                            path: TRAIN_DATA_FILEPATH,
                                            hasHeader: true,
                                            separatorChar: ',',
                                            allowQuoting: true,
                                            allowSparse: false);
            var experimentSettings = new MulticlassExperimentSettings();
            experimentSettings.MaxExperimentTimeInSeconds = ExpTime;
            
            MulticlassClassificationExperiment experiment = mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings);

            var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("Vehicle Type", "Vehicle Type"), new InputOutputColumnPair("Day", "Day") })
                                      .Append(mlContext.Transforms.Concatenate("Features", new[] { "Vehicle Type", "Day", "Ride Distance (km)", "Hour" }));

            ExperimentResult<Microsoft.ML.Data.MulticlassClassificationMetrics> experimentResult = experiment.Execute(trainingDataView, labelColumnName:"Saving", preFeaturizer:dataProcessPipeline);
            var metrics = experimentResult.BestRun.ValidationMetrics;
            Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:0.##}");
            Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:0.##}");

            // Save model
            SaveModel(mlContext, experimentResult.BestRun.Model, MODEL_FILEPATH, trainingDataView.Schema);
        }

public static void CreateModel()
        {
            // Load Data
            IDataView trainingDataView = mlContext.Data.LoadFromTextFile<ModelInput>(
                                            path: TRAIN_DATA_FILEPATH,
                                            hasHeader: true,
                                            separatorChar: ',',
                                            allowQuoting: true,
                                            allowSparse: false);

            // Build training pipeline
            IEstimator<ITransformer> trainingPipeline = BuildTrainingPipeline(mlContext);

            // Evaluate quality of Model
            Evaluate(mlContext, trainingDataView, trainingPipeline);

            // Train Model
            ITransformer mlModel = TrainModel(mlContext, trainingDataView, trainingPipeline);

            // Save model
            SaveModel(mlContext, mlModel, MODEL_FILEPATH, trainingDataView.Schema);
        }

        public static IEstimator<ITransformer> BuildTrainingPipeline(MLContext mlContext)
        {
            // Data process configuration with pipeline data transformations 
            var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("Class", "Class")
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("Vehicle Type", "Vehicle Type"), new InputOutputColumnPair("Day", "Day") }))
                                      .Append(mlContext.Transforms.Concatenate("Features", new[] { "Vehicle Type", "Day", "Ride Distance (km)", "Hour" }))
                                      .AppendCacheCheckpoint(mlContext);

            // Set the training algorithm 
            var trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Class", featureColumnName: "Features"), labelColumnName: "Class")
                                      .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));
            var trainingPipeline = dataProcessPipeline.Append(trainer);

            return trainingPipeline;
        }

        public static ITransformer TrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> trainingPipeline)
        {
            Console.WriteLine("=============== Training  model ===============");

            ITransformer model = trainingPipeline.Fit(trainingDataView);

            Console.WriteLine("=============== End of training process ===============");
            return model;
        }

        private static void Evaluate(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> trainingPipeline)
        {
            // Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics
            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(trainingDataView, trainingPipeline, numberOfFolds: 5, labelColumnName: "Class");
            PrintMulticlassClassificationFoldsAverageMetrics(crossValidationResults);
        }

        private static void SaveModel(MLContext mlContext, ITransformer mlModel, string modelRelativePath, DataViewSchema modelInputSchema)
        {
            // Save/persist the trained model to a .ZIP file
            Console.WriteLine($"=============== Saving the model  ===============");
            mlContext.Model.Save(mlModel, modelInputSchema, GetAbsolutePath(modelRelativePath));
            Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath));
        }

        public static string GetAbsolutePath(string relativePath)
        {
            //FileInfo _dataRoot = new FileInfo(this.GetType().Assembly.Location);
            string assemblyFolderPath = System.IO.Directory.GetCurrentDirectory();

            string fullPath = Path.Combine(assemblyFolderPath, relativePath);

            return fullPath;
        }

        public static void PrintMulticlassClassificationMetrics(MulticlassClassificationMetrics metrics)
        {
            Console.WriteLine($"************************************************************");
            Console.WriteLine($"*    Metrics for multi-class classification model   ");
            Console.WriteLine($"*-----------------------------------------------------------");
            Console.WriteLine($"    MacroAccuracy = {metrics.MacroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better");
            Console.WriteLine($"    MicroAccuracy = {metrics.MicroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better");
            Console.WriteLine($"    LogLoss = {metrics.LogLoss:0.####}, the closer to 0, the better");
            for (int i = 0; i < metrics.PerClassLogLoss.Count; i++)
            {
                Console.WriteLine($"    LogLoss for class {i + 1} = {metrics.PerClassLogLoss[i]:0.####}, the closer to 0, the better");
            }
            Console.WriteLine($"************************************************************");
        }

        public static void PrintMulticlassClassificationFoldsAverageMetrics(IEnumerable<TrainCatalogBase.CrossValidationResult<MulticlassClassificationMetrics>> crossValResults)
        {
            var metricsInMultipleFolds = crossValResults.Select(r => r.Metrics);

            var microAccuracyValues = metricsInMultipleFolds.Select(m => m.MicroAccuracy);
            var microAccuracyAverage = microAccuracyValues.Average();
            var microAccuraciesStdDeviation = CalculateStandardDeviation(microAccuracyValues);
            var microAccuraciesConfidenceInterval95 = CalculateConfidenceInterval95(microAccuracyValues);

            var macroAccuracyValues = metricsInMultipleFolds.Select(m => m.MacroAccuracy);
            var macroAccuracyAverage = macroAccuracyValues.Average();
            var macroAccuraciesStdDeviation = CalculateStandardDeviation(macroAccuracyValues);
            var macroAccuraciesConfidenceInterval95 = CalculateConfidenceInterval95(macroAccuracyValues);

            var logLossValues = metricsInMultipleFolds.Select(m => m.LogLoss);
            var logLossAverage = logLossValues.Average();
            var logLossStdDeviation = CalculateStandardDeviation(logLossValues);
            var logLossConfidenceInterval95 = CalculateConfidenceInterval95(logLossValues);

            var logLossReductionValues = metricsInMultipleFolds.Select(m => m.LogLossReduction);
            var logLossReductionAverage = logLossReductionValues.Average();
            var logLossReductionStdDeviation = CalculateStandardDeviation(logLossReductionValues);
            var logLossReductionConfidenceInterval95 = CalculateConfidenceInterval95(logLossReductionValues);

            Console.WriteLine($"*************************************************************************************************************");
            Console.WriteLine($"*       Metrics for Multi-class Classification model      ");
            Console.WriteLine($"*------------------------------------------------------------------------------------------------------------");
            Console.WriteLine($"*       Average MicroAccuracy:    {microAccuracyAverage:0.###}  - Standard deviation: ({microAccuraciesStdDeviation:#.###})  - Confidence Interval 95%: ({microAccuraciesConfidenceInterval95:#.###})");
            Console.WriteLine($"*       Average MacroAccuracy:    {macroAccuracyAverage:0.###}  - Standard deviation: ({macroAccuraciesStdDeviation:#.###})  - Confidence Interval 95%: ({macroAccuraciesConfidenceInterval95:#.###})");
            Console.WriteLine($"*       Average LogLoss:          {logLossAverage:#.###}  - Standard deviation: ({logLossStdDeviation:#.###})  - Confidence Interval 95%: ({logLossConfidenceInterval95:#.###})");
            Console.WriteLine($"*       Average LogLossReduction: {logLossReductionAverage:#.###}  - Standard deviation: ({logLossReductionStdDeviation:#.###})  - Confidence Interval 95%: ({logLossReductionConfidenceInterval95:#.###})");
            Console.WriteLine($"*************************************************************************************************************");

        }

        public static double CalculateStandardDeviation(IEnumerable<double> values)
        {
            double average = values.Average();
            double sumOfSquaresOfDifferences = values.Select(val => (val - average) * (val - average)).Sum();
            double standardDeviation = Math.Sqrt(sumOfSquaresOfDifferences / (values.Count() - 1));
            return standardDeviation;
        }

        public static double CalculateConfidenceInterval95(IEnumerable<double> values)
        {
            double confidenceInterval95 = 1.96 * CalculateStandardDeviation(values) / Math.Sqrt((values.Count() - 1));
            return confidenceInterval95;
        }

Start Training

In [11]:
CreateModel();
//DoAutoML();

*************************************************************************************************************
*       Metrics for Multi-class Classification model      
*------------------------------------------------------------------------------------------------------------
*       Average MicroAccuracy:    0.894  - Standard deviation: (.099)  - Confidence Interval 95%: (.097)
*       Average MacroAccuracy:    0.9  - Standard deviation: (.092)  - Confidence Interval 95%: (.091)
*       Average LogLoss:          .796  - Standard deviation: (.756)  - Confidence Interval 95%: (.741)
*       Average LogLossReduction: .254  - Standard deviation: (.702)  - Confidence Interval 95%: (.688)
*************************************************************************************************************
The model is saved to c:\jobs\Hikaton\Hikaton\Model\MLModel.zip


Try to predict

In [12]:
public static ModelOutput Predict(ModelInput input)
        {

            // Create new MLContext
            MLContext mlContext = new MLContext();

            // Load model & create prediction engine
            string modelPath = MODEL_FILEPATH;
            ITransformer mlModel = mlContext.Model.Load(modelPath, out var modelInputSchema);
            var predEngine = mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(mlModel);

            // Use model to make prediction on input data
            ModelOutput result = predEngine.Predict(input);
            return result;
        }
        
         for (int jam = 5; jam < 24; jam++)
            {
                // Create single instance of sample data from first line of dataset for model input
                ModelInput sampleData = new ModelInput() { Day="Fri", Hour=jam, Ride_Distance__km_=10, Vehicle_Type="GrabBike" };

                // Make a single prediction on the sample data and print results
                ModelOutput predictionResult = Predict(sampleData);

                Console.WriteLine("Using model to make single prediction \n\n");
                Console.WriteLine($"Ride Distance (km): {sampleData.Ride_Distance__km_}");
                Console.WriteLine($"Vehicle Type: {sampleData.Vehicle_Type}");
                Console.WriteLine($"Hour: {sampleData.Hour}");
                Console.WriteLine($"Day: {sampleData.Day}");
                Console.WriteLine($"\n\nPredicted Class value {predictionResult.Prediction} \nPredicted Class scores: [{String.Join(",", predictionResult.Score)}]\n\n");
              
            }
            Console.WriteLine("=============== End of process ===============");

Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 5
Day: Fri


Predicted Class value MAHAL 
Predicted Class scores: [0.49936384,0.00092660566,0.49970955]


Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 6
Day: Fri


Predicted Class value MAHAL 
Predicted Class scores: [0.49936384,0.00092660566,0.49970955]


Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 7
Day: Fri


Predicted Class value MAHAL 
Predicted Class scores: [0.49936384,0.00092660566,0.49970955]


Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 8
Day: Fri


Predicted Class value MAHAL 
Predicted Class scores: [0.49936384,0.00092660566,0.49970955]


Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 9
Day: Fri


Predicted Class value MAHAL 
Predicted Class scores: [0.49936384,0.00092660566,0.49970955]


Using