Nuget

In [5]:
#r "nuget:Microsoft.ML"
#r "nuget:MathNet.Numerics"
#r "nuget:System.Data.Common"
#r "nuget:Microsoft.ML.AutoML"
#r "nuget:Microsoft.Data.DataFrame,0.1.1-e190920-1"
#r "nuget:Microsoft.ML.LightGbm"


Reference

In [9]:
using System;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.AutoML;
using System.IO;
using System.Collections.Generic;
using System.Data;
using System.Text.RegularExpressions;
using System.Text;
using MathNet.Numerics.LinearAlgebra;
using System.Linq;
using System.Reflection;
using Microsoft.Data;
using XPlot.Plotly;
using Microsoft.AspNetCore.Html;
using Microsoft.ML.Trainers.LightGbm;

Formatter<DataFrame>.Register((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c)));
    var rows = new List<List<IHtmlContent>>();
    var take = 20;
    for (var i = 0; i < Math.Min(take, df.RowCount); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }
    
    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));
    
    writer.Write(t);
}, "text/html");

Model Class

In [18]:
 public class ModelInput
    {
        [ColumnName("Ride Distance (km)"), LoadColumn(0)]
        public float Ride_Distance__km_ { get; set; }


        [ColumnName("Vehicle Type"), LoadColumn(1)]
        public string Vehicle_Type { get; set; }


        [ColumnName("Promo Value"), LoadColumn(2)]
        public float Promo_Value { get; set; }


        [ColumnName("Fare"), LoadColumn(3)]
        public float Fare { get; set; }


        [ColumnName("Paid"), LoadColumn(4)]
        public float Paid { get; set; }


        [ColumnName("Hour"), LoadColumn(5)]
        public float Hour { get; set; }


        [ColumnName("Minute"), LoadColumn(6)]
        public float Minute { get; set; }


        [ColumnName("Class"), LoadColumn(7)]
        public string Class { get; set; }


        [ColumnName("Day"), LoadColumn(8)]
        public string Day { get; set; }


        [ColumnName("Saving"), LoadColumn(9)]
        public float Saving { get; set; }


    }
    public class ModelOutput
    {
        public float Score { get; set; }
    }

function helpers

In [26]:
        private static string TRAIN_DATA_FILEPATH = @"BusinessProfile4.csv";
        private static string MODEL_FILEPATH = @"MLModel2.zip";

        // Create MLContext to be shared across the model creation workflow objects 
        // Set a random seed for repeatable/deterministic results across multiple trainings.
        private static MLContext mlContext = new MLContext(seed: 1);

        public static void CreateModel()
        {
            // Load Data
            IDataView trainingDataView = mlContext.Data.LoadFromTextFile<ModelInput>(
                                            path: TRAIN_DATA_FILEPATH,
                                            hasHeader: true,
                                            separatorChar: ',',
                                            allowQuoting: true,
                                            allowSparse: false);

            // Build training pipeline
            IEstimator<ITransformer> trainingPipeline = BuildTrainingPipeline(mlContext);

            // Evaluate quality of Model
            Evaluate(mlContext, trainingDataView, trainingPipeline);

            // Train Model
            ITransformer mlModel = TrainModel(mlContext, trainingDataView, trainingPipeline);

            // Save model
            SaveModel(mlContext, mlModel, MODEL_FILEPATH, trainingDataView.Schema);
        }

        public static IEstimator<ITransformer> BuildTrainingPipeline(MLContext mlContext)
        {
           // Data process configuration with pipeline data transformations 
            var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("Vehicle Type", "Vehicle Type"), new InputOutputColumnPair("Day", "Day") })
                                      .Append(mlContext.Transforms.Concatenate("Features", new[] { "Vehicle Type", "Day", "Ride Distance (km)", "Hour" }));

            // Set the training algorithm 
            var trainer = mlContext.Regression.Trainers.FastTree(labelColumnName: "Saving", featureColumnName: "Features");
            var trainingPipeline = dataProcessPipeline.Append(trainer);

            return trainingPipeline;
        }

        public static ITransformer TrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> trainingPipeline)
        {
            Console.WriteLine("=============== Training  model ===============");

            ITransformer model = trainingPipeline.Fit(trainingDataView);

            Console.WriteLine("=============== End of training process ===============");
            return model;
        }

        private static void Evaluate(MLContext mlContext, IDataView trainingDataView, IEstimator<ITransformer> trainingPipeline)
        {
            // Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics
            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValidationResults = mlContext.Regression.CrossValidate(trainingDataView, trainingPipeline, numberOfFolds: 5, labelColumnName: "Saving");
            PrintRegressionFoldsAverageMetrics(crossValidationResults);
        }

        private static void SaveModel(MLContext mlContext, ITransformer mlModel, string modelRelativePath, DataViewSchema modelInputSchema)
        {
            // Save/persist the trained model to a .ZIP file
            Console.WriteLine($"=============== Saving the model  ===============");
            mlContext.Model.Save(mlModel, modelInputSchema, GetAbsolutePath(modelRelativePath));
            Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath));
        }

        public static string GetAbsolutePath(string relativePath)
        {
             //FileInfo _dataRoot = new FileInfo(this.GetType().Assembly.Location);
            string assemblyFolderPath = System.IO.Directory.GetCurrentDirectory();

            string fullPath = Path.Combine(assemblyFolderPath, relativePath);

            return fullPath;
        }

        public static void PrintRegressionMetrics(RegressionMetrics metrics)
        {
            Console.WriteLine($"*************************************************");
            Console.WriteLine($"*       Metrics for Regression model      ");
            Console.WriteLine($"*------------------------------------------------");
            Console.WriteLine($"*       LossFn:        {metrics.LossFunction:0.##}");
            Console.WriteLine($"*       R2 Score:      {metrics.RSquared:0.##}");
            Console.WriteLine($"*       Absolute loss: {metrics.MeanAbsoluteError:#.##}");
            Console.WriteLine($"*       Squared loss:  {metrics.MeanSquaredError:#.##}");
            Console.WriteLine($"*       RMS loss:      {metrics.RootMeanSquaredError:#.##}");
            Console.WriteLine($"*************************************************");
        }

        public static void PrintRegressionFoldsAverageMetrics(IEnumerable<TrainCatalogBase.CrossValidationResult<RegressionMetrics>> crossValidationResults)
        {
            var L1 = crossValidationResults.Select(r => r.Metrics.MeanAbsoluteError);
            var L2 = crossValidationResults.Select(r => r.Metrics.MeanSquaredError);
            var RMS = crossValidationResults.Select(r => r.Metrics.RootMeanSquaredError);
            var lossFunction = crossValidationResults.Select(r => r.Metrics.LossFunction);
            var R2 = crossValidationResults.Select(r => r.Metrics.RSquared);

            Console.WriteLine($"*************************************************************************************************************");
            Console.WriteLine($"*       Metrics for Regression model      ");
            Console.WriteLine($"*------------------------------------------------------------------------------------------------------------");
            Console.WriteLine($"*       Average L1 Loss:       {L1.Average():0.###} ");
            Console.WriteLine($"*       Average L2 Loss:       {L2.Average():0.###}  ");
            Console.WriteLine($"*       Average RMS:           {RMS.Average():0.###}  ");
            Console.WriteLine($"*       Average Loss Function: {lossFunction.Average():0.###}  ");
            Console.WriteLine($"*       Average R-squared:     {R2.Average():0.###}  ");
            Console.WriteLine($"*************************************************************************************************************");
        }

Start Training

In [27]:
CreateModel();

*************************************************************************************************************
*       Metrics for Regression model      
*------------------------------------------------------------------------------------------------------------
*       Average L1 Loss:       9411.912 
*       Average L2 Loss:       271179160.38  
*       Average RMS:           14617.389  
*       Average Loss Function: 271179172.385  
*       Average R-squared:     0.158  
*************************************************************************************************************
The model is saved to C:\jobs\Hikaton\Model\MLModel2.zip


In [28]:
public static ModelOutput Predict(ModelInput input)
        {

            // Create new MLContext
            MLContext mlContext = new MLContext();

            // Load model & create prediction engine
            string modelPath =  MODEL_FILEPATH;
            ITransformer mlModel = mlContext.Model.Load(modelPath, out var modelInputSchema);
            var predEngine = mlContext.Model.CreatePredictionEngine<ModelInput, ModelOutput>(mlModel);

            // Use model to make prediction on input data
            ModelOutput result = predEngine.Predict(input);
            return result;
        }
        
        for (int jam = 5; jam < 24; jam++)
            {
                // Create single instance of sample data from first line of dataset for model input
                ModelInput sampleData = new ModelInput() { Day="Fri", Hour=jam, Ride_Distance__km_=10, Vehicle_Type="GrabBike" };

                // Make a single prediction on the sample data and print results
                ModelOutput predictionResult = Predict(sampleData);

                Console.WriteLine("Using model to make single prediction \n\n");
                Console.WriteLine($"Ride Distance (km): {sampleData.Ride_Distance__km_}");
                Console.WriteLine($"Vehicle Type: {sampleData.Vehicle_Type}");
                Console.WriteLine($"Hour: {sampleData.Hour}");
                Console.WriteLine($"Day: {sampleData.Day}");
                Console.WriteLine($"\n\nPredicted Saving: Rp.{predictionResult.Score}\n\n");
              
            }
            Console.WriteLine("=============== End of process ===============");

Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 5
Day: Fri


Predicted Saving: Rp.10530.258


Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 6
Day: Fri


Predicted Saving: Rp.10530.258


Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 7
Day: Fri


Predicted Saving: Rp.10530.258


Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 8
Day: Fri


Predicted Saving: Rp.10530.258


Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 9
Day: Fri


Predicted Saving: Rp.10530.258


Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 10
Day: Fri


Predicted Saving: Rp.22668.65


Using model to make single prediction 


Ride Distance (km): 10
Vehicle Type: GrabBike
Hour: 11
Day: Fri


Predicted Saving: Rp.22668.65


Using model to make single 