In [1]:
#r "nuget: Microsoft.ML"
using Microsoft.ML;
using Microsoft.ML.Data;

This example comes from the ML.NET documentation: https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.featureselectioncatalog.selectfeaturesbasedonmutualinformation?view=ml-dotnet

In [2]:
class TransformedData
{
    public float[] NumericVectorA { get; set; }

    public float[] NumericVectorB { get; set; }
}

class NumericData
{
    public bool Label;

    [VectorType(3)]
    public float[] NumericVectorA { get; set; }

    [VectorType(3)]
    public float[] NumericVectorB { get; set; }
}

In [3]:
static IEnumerable<NumericData> GetData()
{
    var data = new List<NumericData>
    {
        new NumericData
        {
            Label = true,
            NumericVectorA = new float[] { 4, 0, 6 },
            NumericVectorB = new float[] { 7, 8, 9 },
        },
        new NumericData
        {
            Label = false,
            NumericVectorA = new float[] { 0, 5, 7 },
            NumericVectorB = new float[] { 7, 9, 0 },
        },
        new NumericData
        {
            Label = true,
            NumericVectorA = new float[] { 4, 0, 6 },
            NumericVectorB = new float[] { 7, 8, 9 },
        },
        new NumericData
        {
            Label = false,
            NumericVectorA = new float[] { 0, 5, 7 },
            NumericVectorB = new float[] { 7, 8, 0 },
        }
    };
    return data;
}

In [4]:
var mlContext = new MLContext();

In [5]:
var rawData = GetData();
rawData

index,NumericVectorA,NumericVectorB,Label
0,"[ 4, 0, 6 ]","[ 7, 8, 9 ]",True
1,"[ 0, 5, 7 ]","[ 7, 9, 0 ]",False
2,"[ 4, 0, 6 ]","[ 7, 8, 9 ]",True
3,"[ 0, 5, 7 ]","[ 7, 8, 0 ]",False


In [6]:
var data = mlContext.Data.LoadFromEnumerable(rawData);

We define a MutualInformationFeatureSelectingEstimator that selects the top k slots in a feature vector based on highest mutual information between that slot and a specified label. 

Multi column example : This pipeline transform two columns using the provided parameters.

In [7]:
var pipeline = mlContext.Transforms.FeatureSelection
    .SelectFeaturesBasedOnMutualInformation(
        new InputOutputColumnPair[] 
        {
            new InputOutputColumnPair("NumericVectorA"),
            new InputOutputColumnPair("NumericVectorB")
        },
        labelColumnName: "Label",
        slotsInOutput: 4
    );

In [8]:
var transformedData = pipeline.Fit(data).Transform(data);

In [9]:
mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false)

index,NumericVectorA,NumericVectorB
0,"[ 4, 0, 6 ]",[ 9 ]
1,"[ 0, 5, 7 ]",[ 0 ]
2,"[ 4, 0, 6 ]",[ 9 ]
3,"[ 0, 5, 7 ]",[ 0 ]
