In [2]:
#r "nuget: Microsoft.ML"
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;

This example comes from the ML.NET documentation: https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.extensionscatalog.replacemissingvalues?view=ml-dotnet

In [3]:
class DataPoint
{
    [VectorType(3)]
    public float[] Features1 { get; set; }
    [VectorType(2)]
    public float[] Features2 { get; set; }
}

class SampleDataTransformed : DataPoint
{
    [VectorType(3)]
    public float[] MissingReplaced1 { get; set; }
    [VectorType(2)]
    public float[] MissingReplaced2 { get; set; }
}

In [4]:
var mlContext = new MLContext();

In [5]:
var samples = new List<DataPoint>()
{
    new DataPoint(){ Features1 = new float[3] {1, 1, 0}, Features2 = new float[2] {1, 1} },
    new DataPoint(){ Features1 = new float[3] {0, float.NaN, 1}, Features2 = new float[2] {0, 1} },
    new DataPoint(){ Features1 = new float[3] {-1, float.NaN, -3}, Features2 = new float[2] {-1, float.NaN} },
    new DataPoint(){ Features1 = new float[3] {-1, 6, -3}, Features2 = new float[2] {0, float.PositiveInfinity} },
};

In [6]:
var data = mlContext.Data.LoadFromEnumerable(samples);

Here we use the default replacement mode, which replaces the value with the default value for its type.

In [7]:
var defaultPipeline = mlContext.Transforms.ReplaceMissingValues(
    new[] {
        new InputOutputColumnPair("MissingReplaced1", "Features1"),
        new InputOutputColumnPair("MissingReplaced2", "Features2")
    },
    MissingValueReplacingEstimator.ReplacementMode.DefaultValue
);

In [8]:
var defaultTransformer = defaultPipeline.Fit(data);
var defaultTransformedData = defaultTransformer.Transform(data);

In [9]:
mlContext.Data.CreateEnumerable<SampleDataTransformed>(defaultTransformedData, reuseRowObject: false)

index,MissingReplaced1,MissingReplaced2,Features1,Features2
0,"[ 1, 1, 0 ]","[ 1, 1 ]","[ 1, 1, 0 ]","[ 1, 1 ]"
1,"[ 0, 0, 1 ]","[ 0, 1 ]","[ 0, NaN, 1 ]","[ 0, 1 ]"
2,"[ -1, 0, -3 ]","[ -1, 0 ]","[ -1, NaN, -3 ]","[ -1, NaN ]"
3,"[ -1, 6, -3 ]","[ 0, Infinity ]","[ -1, 6, -3 ]","[ 0, Infinity ]"


Here we use the mean replacement mode, which replaces the value with the mean of the non values that were not missing.

In [10]:
var meanPipeline = mlContext.Transforms.ReplaceMissingValues(
    new[] {
        new InputOutputColumnPair("MissingReplaced1", "Features1"),
        new InputOutputColumnPair("MissingReplaced2", "Features2")
    },
    MissingValueReplacingEstimator.ReplacementMode.Mean
);

In [11]:
var meanTransformer = meanPipeline.Fit(data);
var meanTransformedData = meanTransformer.Transform(data);

In [12]:
mlContext.Data.CreateEnumerable<SampleDataTransformed>(meanTransformedData, reuseRowObject: false)

index,MissingReplaced1,MissingReplaced2,Features1,Features2
0,"[ 1, 1, 0 ]","[ 1, 1 ]","[ 1, 1, 0 ]","[ 1, 1 ]"
1,"[ 0, 3.5, 1 ]","[ 0, 1 ]","[ 0, NaN, 1 ]","[ 0, 1 ]"
2,"[ -1, 3.5, -3 ]","[ -1, 1 ]","[ -1, NaN, -3 ]","[ -1, NaN ]"
3,"[ -1, 6, -3 ]","[ 0, Infinity ]","[ -1, 6, -3 ]","[ 0, Infinity ]"
