In [15]:
#r "nuget:Microsoft.ML,1.5.0-preview"
#r "nuget:Microsoft.ML.AutoML,0.17.0-preview"
#r "nuget:Microsoft.Data.Analysis,0.1.0"
#r "nuget:Microsoft.ML.OnnxConverter,0.17.0-preview"

In [16]:
using Microsoft.Data.Analysis;
using XPlot.Plotly;

In [17]:
using Microsoft.AspNetCore.Html;
Formatter<DataFrame>.Register((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 20;
    for (var i = 0; i < Math.Min(take, df.RowCount); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }
    
    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));
    
    writer.Write(t);
}, "text/html");

In [18]:
using System.IO;
using System.Net.Http;
string ordersPath = "order-details.csv";
if (!File.Exists(ordersPath))
{
    var contents = new HttpClient()
        .GetStringAsync("https://raw.githubusercontent.com/rondagdag/onnx-pected/master/GenerateONNX-AutoML-Orders/GenerateONNX-AutoML/Data/order-details.csv").Result;
        
    File.WriteAllText("order-details.csv", contents);
}

In [19]:
var ordersData = DataFrame.LoadCsv(ordersPath);
ordersData

index,OrderID,ProductID,UnitPrice,Quantity,Discount
0,10248,11,14.0,12,0.0
1,10248,42,9.8,10,0.0
2,10248,72,34.8,5,0.0
3,10249,14,18.6,9,0.0
4,10249,51,42.4,40,0.0
5,10250,41,7.7,10,0.0
6,10250,51,42.4,35,0.15
7,10250,65,16.8,15,0.15
8,10251,22,16.8,6,0.05
9,10251,57,15.6,15,0.05


In [29]:
Chart.Plot(
    new Graph.Histogram()
    {
        x = ordersData["Discount"],
        nbinsx = 20
    }
)

In [38]:
var chart = Chart.Plot(
    new Graph.Scattergl()
    {
        x = ordersData["UnitPrice"],
        y = ordersData["Quantity"],
        mode = "markers",
        marker = new Graph.Marker()
        {
            color = ordersData["Discount"],
            colorscale = "Jet"
        }
    }
);

chart.Width = 600;
chart.Height = 600;
display(chart);

In [39]:
static T[] Shuffle<T>(T[] array)
{
    Random rand = new Random();
    for (int i = 0; i < array.Length; i++)
    {
        int r = i + rand.Next(array.Length - i);
        T temp = array[r];
        array[r] = array[i];
        array[i] = temp;
    }
    return array;
}

int[] randomIndices = Shuffle(Enumerable.Range(0, (int)ordersData.RowCount).ToArray());
int testSize = (int)(ordersData.RowCount * .1);
int[] trainRows = randomIndices[testSize..];
int[] testRows = randomIndices[..testSize];

DataFrame orders_train = ordersData[trainRows];
DataFrame orders_test = ordersData[testRows];

display(orders_train.RowCount);
display(orders_test.RowCount);

In [40]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.AutoML;

In [47]:
%%time

var mlContext = new MLContext();

var experiment = mlContext.Auto().CreateRegressionExperiment(maxExperimentTimeInSeconds: 60);
var result = experiment.Execute(orders_train, labelColumnName:"Discount");

Wall time: 69465.52990000001ms

In [48]:
var scatters = result.RunDetails.Where(d => d.ValidationMetrics != null).GroupBy(    
    r => r.TrainerName,
    (name, details) => new Graph.Scattergl()
    {
        name = name,
        x = details.Select(r => r.RuntimeInSeconds),
        y = details.Select(r => r.ValidationMetrics.MeanAbsoluteError),
        mode = "markers",
        marker = new Graph.Marker() { size = 12 }
    });

var chart = Chart.Plot(scatters);
chart.WithXTitle("Training Time");
chart.WithYTitle("Error");
display(chart);

Console.WriteLine($"Best Trainer:{result.BestRun.TrainerName}");

Best Trainer:FastForestRegression


In [50]:
var testResults = result.BestRun.Model.Transform(orders_test);

var trueValues = testResults.GetColumn<float>("Discount");
var predictedValues = testResults.GetColumn<float>("Score");

var predictedVsTrue = new Graph.Scattergl()
{
    x = trueValues,
    y = predictedValues,
    mode = "markers",
};

var maximumValue = Math.Max(trueValues.Max(), predictedValues.Max());

var perfectLine = new Graph.Scattergl()
{
    x = new[] {0, maximumValue},
    y = new[] {0, maximumValue},
    mode = "lines",
};

var chart = Chart.Plot(new[] {predictedVsTrue, perfectLine });
chart.WithXTitle("True Values");
chart.WithYTitle("Predicted Values");
chart.WithLegend(false);
chart.Width = 600;
chart.Height = 600;
display(chart);