In [1]:
#r "nuget:Microsoft.Data.Analysis,0.2.0"
#r "nuget:Microsoft.ML,1.4.0"
#r "nuget:Microsoft.ML.AutoML,0.16.0"

In [2]:
using Microsoft.Data.Analysis;
using XPlot.Plotly;

### Introduction to DataFrame
https://devblogs.microsoft.com/dotnet/an-introduction-to-dataframe/

In [3]:
using Microsoft.AspNetCore.Html;
Formatter<DataFrame>.Register((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 20;
    for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df.Rows[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }
    
    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));
    
    writer.Write(t);
}, "text/html");

In [11]:
string fileName = @"D:\softaware\samples-ml\data\tablesoccer-export.csv";

var tablesoccerData = DataFrame.LoadCsv(fileName);

In [7]:
tablesoccerData.Description()

index,Description,Hour,GoalsTeam1,GoalsTeam2,GoalDifference,Result
0,Length (excluding null values),1001,1001.0,1001.0,1001.0,1001.0
1,Max,0,5.0,5.0,5.0,2.0
2,Min,0,0.0,0.0,-5.0,1.0
3,Mean,0,3.856144,3.7452548,0.110889114,1.4565434


In [8]:
display(tablesoccerData["GoalDifference"].Mean());
display(tablesoccerData["GoalDifference"].Median());


tablesoccerData["GoalDifference"].ValueCounts()

index,Values,Counts
0,1,220
1,-1,141
2,-3,94
3,3,84
4,5,66
5,-2,112
6,2,121
7,4,53
8,-4,52
9,-5,58


In [None]:
tablesoccerData

In [9]:
Chart.Plot(
    new Graph.Histogram()
    {
        x = tablesoccerData["Hour"]
    }
)

In [12]:
Chart.Plot(
    new Graph.Histogram() {
        x = tablesoccerData["GoalsTeam1"]
    }
)

In [12]:
tablesoccerData = tablesoccerData.Filter(tablesoccerData["GoalsTeam1"].ElementwiseLessThanOrEqual(5));
tablesoccerData = tablesoccerData.Filter(tablesoccerData["GoalsTeam2"].ElementwiseLessThanOrEqual(5));

display(tablesoccerData.Rows.Count);

In [None]:
tablesoccerData.Columns.Remove("GoalsTeam1");
tablesoccerData.Columns.Remove("GoalsTeam2");
tablesoccerData.Columns.Remove("Result");

In [8]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.AutoML;

In [13]:
static T[] Shuffle<T>(T[] array)
{
    Random rand = new Random();
    for (int i = 0; i < array.Length; i++)
    {
        int r = i + rand.Next(array.Length - i);
        T temp = array[r];
        array[r] = array[i];
        array[i] = temp;
    }
    return array;
}

int[] randomIndices = Shuffle(Enumerable.Range(0, (int)tablesoccerData.Rows.Count).ToArray());
int testSize = (int)(tablesoccerData.Rows.Count * .8);
int[] trainRows = randomIndices[testSize..];
int[] testRows = randomIndices[..testSize];

DataFrame tablesoccerData_train = tablesoccerData[trainRows];
DataFrame tablesoccerData_test = tablesoccerData[testRows];

display(tablesoccerData_train.Rows.Count);
display(tablesoccerData_test.Rows.Count);

In [None]:
tablesoccerData_train

In [14]:
%%time


var mlContext = new MLContext();

var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(
    new[] 
    { 
        new InputOutputColumnPair("Team1GoalKeeper", "Team1GoalKeeper"), 
        new InputOutputColumnPair("Team1Striker", "Team1Striker"), 
        new InputOutputColumnPair("Team2GoalKeeper", "Team2GoalKeeper"), 
        new InputOutputColumnPair("Team2Striker", "Team2Striker") 
    })
  .Append(mlContext.Transforms.Concatenate(
      "Features", 
      new[] 
      { 
          "Team1GoalKeeper", "Team1Striker", "Team2GoalKeeper", "Team2Striker"          
      }))
  .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
  .AppendCacheCheckpoint(mlContext);

var trainer = mlContext.Regression.Trainers.Sdca(labelColumnName: "GoalDifference", featureColumnName: "Features");

var trainingPipeline = dataProcessPipeline.Append(trainer);

var model = trainingPipeline.Fit(tablesoccerData_train);

Wall time: 1499,4529ms

In [15]:
static void PrintRegressionFoldsAverageMetrics(IEnumerable<TrainCatalogBase.CrossValidationResult<RegressionMetrics>> crossValidationResults)
{
    var L1 = crossValidationResults.Select(r => r.Metrics.MeanAbsoluteError);
    var L2 = crossValidationResults.Select(r => r.Metrics.MeanSquaredError);
    var RMS = crossValidationResults.Select(r => r.Metrics.RootMeanSquaredError);
    var lossFunction = crossValidationResults.Select(r => r.Metrics.LossFunction);
    var R2 = crossValidationResults.Select(r => r.Metrics.RSquared);

    Console.WriteLine($"*************************************************************************************************************");
    Console.WriteLine($"*       Metrics for Regression model      ");
    Console.WriteLine($"*------------------------------------------------------------------------------------------------------------");
    Console.WriteLine($"*       Average L1 Loss:       {L1.Average():0.###} ");
    Console.WriteLine($"*       Average L2 Loss:       {L2.Average():0.###}  ");
    Console.WriteLine($"*       Average RMS:           {RMS.Average():0.###}  ");
    Console.WriteLine($"*       Average Loss Function: {lossFunction.Average():0.###}  ");
    Console.WriteLine($"*       Average R-squared:     {R2.Average():0.###}  ");
    Console.WriteLine($"*************************************************************************************************************");
}

In [16]:
var crossValidationResults = mlContext.Regression.CrossValidate(tablesoccerData_test, trainingPipeline, numberOfFolds: 5, labelColumnName: "GoalDifference");

PrintRegressionFoldsAverageMetrics(crossValidationResults);

*************************************************************************************************************
*       Metrics for Regression model      
*------------------------------------------------------------------------------------------------------------
*       Average L1 Loss:       2,077 
*       Average L2 Loss:       6,382  
*       Average RMS:           2,523  
*       Average Loss Function: 6,382  
*       Average R-squared:     0,383  
*************************************************************************************************************


In [17]:
class Game
{
    public string Team1GoalKeeper { get; set; }
    public string Team1Striker { get; set; }
    public string Team2GoalKeeper { get; set; }
    public string Team2Striker { get; set; }
}

class Result
{
    public float Score { get; set; }
}

In [19]:
var predictionEngine = mlContext.Model.CreatePredictionEngine<Game, Result>(model);

In [23]:
var result = predictionEngine.Predict(new Game()
{
    Team1GoalKeeper = "Christoph",
    Team1Striker = "Daniel B.",
    Team2GoalKeeper = "Michael",
    Team2Striker = "Daniel S."
});

result

Score
0.24464674


In [25]:
var predictions = new List<(float, float)>();

var data = tablesoccerData_test;

for (int i=0; i < data.Rows.Count; i++)
{
    var game = new Game()
    {
        Team1GoalKeeper = (string)data["Team1GoalKeeper"][i],
        Team1Striker = (string)data["Team1Striker"][i],
        Team2GoalKeeper = (string)data["Team2GoalKeeper"][i],
        Team2Striker = (string)data["Team2Striker"][i],
    };
    
    var result = (float)data["GoalDifference"][i];
    var prediction = predictionEngine.Predict(game);
    
    predictions.Add((result, prediction.Score));
}

var predictionTrace = new Graph.Scatter()
                        {
                            x = predictions.Select(p => p.Item1).ToList(),
                            y = predictions.Select(p => Math.Round(p.Item2)).ToList(),
                            //z = predictions.Select((p, index) => index).ToList(),
                            mode = "markers",
                            marker = new Graph.Marker()
                            {
                                size = 7,
                                opacity = 0.2
                            }
                        };

var correctTrace = new Graph.Scatter()
    {
        x = new int[] { -5, 5 },
        y = new int[] { -5, 5 }
    };

display(Chart.Plot(new [] { predictionTrace, correctTrace }));


In [None]:
using System.Linq;

Random random = new Random();

var players = new string[]
{
    "Roman",
    "Bill",
    "Julia",
    "Anne",
    "Scott",
    "Sarah"
};


for (int i=0; i<1000; i++)
{
    var mix = players.OrderBy(p => random.Next()).Take(4).ToList();
    display(mix[0]+"\t"+mix[1]+"\t"+mix[2]+"\t"+mix[3]);
}