In [1]:
#r "nuget: Microsoft.ML"
using Microsoft.ML;
using Microsoft.ML.Data;
using System.Linq;
using static Microsoft.ML.Transforms.NormalizingTransformer;

This example comes from the ML.NET documentation: https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.normalizationcatalog.normalizesupervisedbinning?view=ml-dotnet

In [2]:
class DataPoint
{
    [VectorType(4)]
    public float[] Features { get; set; }

    [VectorType(3)]
    public float[] Features2 { get; set; }
}

In [3]:
var mlContext = new MLContext();

In [4]:
var samples = new List<DataPoint>()
{
    new DataPoint()
    { 
        Features = new float[4] { 1, 1, 3, 0 },
        Features2 = new float[3] { 1, 2, 3 }
    },
    new DataPoint()
    { 
        Features = new float[4] { 2, 2, 2, 0 },
        Features2 = new float[3] { 3, 4, 5 }
    },
    new DataPoint()
    { 
        Features = new float[4] { 0, 0, 1, 0 },
        Features2 = new float[3] { 6, 7, 8 }
    },
    new DataPoint()
    { 
        Features = new float[4] {-1,-1,-1, 1 },
        Features2 = new float[3] { 9, 0, 4 }
    }
};

In [5]:
var data = mlContext.Data.LoadFromEnumerable(samples);

In [6]:
var columnPair = new[]
{
    new InputOutputColumnPair("Features"),
    new InputOutputColumnPair("Features2")
};

NormalizeMinMax normalize rows by finding min and max values in each row slot and setting projection of min value to 0 and max to 1 and everything else to values in between.

In [7]:
var normalize = mlContext.Transforms.NormalizeMinMax(columnPair, fixZero: false);

Normalize rows by finding min and max values in each row slot, but make sure zero values remain zero after normalization. Helps preserve sparsity. That is, to help maintain very little non-zero elements.

In [8]:
var normalizeFixZero = mlContext.Transforms.NormalizeMinMax(columnPair, fixZero: true);

In [9]:
var normalizeTransform = normalize.Fit(data);
var transformedData = normalizeTransform.Transform(data);
var normalizeFixZeroTransform = normalizeFixZero.Fit(data);
var fixZeroData = normalizeFixZeroTransform.Transform(data);

In [11]:
transformedData.GetColumn<float[]>("Features")

index,value
0,"[ 0.6666667, 0.6666667, 1, 0 ]"
1,"[ 1, 1, 0.75, 0 ]"
2,"[ 0.33333334, 0.33333334, 0.5, 0 ]"
3,"[ 0, 0, 0, 1 ]"


In [12]:
transformedData.GetColumn<float[]>("Features2")

index,value
0,"[ 0, 0.2857143, 0 ]"
1,"[ 0.25, 0.5714286, 0.4 ]"
2,"[ 0.625, 1, 1 ]"
3,"[ 1, 0, 0.2 ]"


In [13]:
fixZeroData.GetColumn<float[]>("Features")

index,value
0,"[ 0.5, 0.5, 1, 0 ]"
1,"[ 1, 1, 0.6666667, 0 ]"
2,"[ 0, 0, 0.33333334, 0 ]"
3,"[ -0.5, -0.5, -0.33333334, 1 ]"


In [14]:
fixZeroData.GetColumn<float[]>("Features2")

index,value
0,"[ 0.11111111, 0.2857143, 0.375 ]"
1,"[ 0.33333334, 0.5714286, 0.625 ]"
2,"[ 0.6666667, 1, 1 ]"
3,"[ 1, 0, 0.5 ]"


In [15]:
normalizeTransform.GetNormalizerModelParameters(0)

Scale,Offset
"[ 0.33333334, 0.33333334, 0.25, 1 ]","[ -1, -1, -1, 0 ]"


In [16]:
normalizeTransform.GetNormalizerModelParameters(1)

Scale,Offset
"[ 0.125, 0.14285715, 0.2 ]","[ 1, 0, 3 ]"
