In [2]:
#r "nuget: Microsoft.ML"
using Microsoft.ML;
using Microsoft.ML.Data;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;

Create classes for loading and mapping data:

In [3]:
class AdultData{
    [LoadColumn(0)]
    public float Age { get; set; }
    [LoadColumn(1)]
    public string WorkClass { get; set; }
    [LoadColumn(2)]
    public float Fnlwgt { get; set; }
    [LoadColumn(3)]
    public float Education { get; set; }
    [LoadColumn(4)]
    public float EducationNum { get; set; }
    [LoadColumn(5)]
    public float MaritalStatus { get; set; }
    [LoadColumn(6)]
    public float Occupation { get; set; }
    [LoadColumn(7)]
    public float Relationship { get; set; }
    [LoadColumn(8)]
    public string Race { get; set; }
    [LoadColumn(9)]
    public string Sex { get; set; }
    [LoadColumn(10)]
    public float CapitalGain { get; set; }
    [LoadColumn(11)]
    public float CapitalLoss { get; set; }
    [LoadColumn(12)]
    public float HoursPerWeek { get; set; }
    [LoadColumn(13)]
    public string NativeCountry { get; set; }
    [LoadColumn(14)]
    [ColumnName("Label")]
    public string Target { get; set; }
}

class LabelComparer
{
    public string Label { get; set; }
    public bool EncodedLabel { get; set; }
    public override string ToString() =>
        $"{Label} => {EncodedLabel}";
}

Download data from [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/machine-learning-databases/adult):

In [4]:
if (!File.Exists("adult.data"))
{
    using var client = new WebClient();
    client.DownloadFile("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", "adult.data");
}
File.ReadLines("adult.data").Take(5)

index,value
0,"39, State-gov, 77516, Bachelors, 13, Never-married, Adm-clerical, Not-in-family, White, Male, 2174, 0, 40, United-States, <=50K"
1,"50, Self-emp-not-inc, 83311, Bachelors, 13, Married-civ-spouse, Exec-managerial, Husband, White, Male, 0, 0, 13, United-States, <=50K"
2,"38, Private, 215646, HS-grad, 9, Divorced, Handlers-cleaners, Not-in-family, White, Male, 0, 0, 40, United-States, <=50K"
3,"53, Private, 234721, 11th, 7, Married-civ-spouse, Handlers-cleaners, Husband, Black, Male, 0, 0, 40, United-States, <=50K"
4,"28, Private, 338409, Bachelors, 13, Married-civ-spouse, Prof-specialty, Wife, Black, Female, 0, 0, 40, Cuba, <=50K"


Instantiate a new MLContext:

In [5]:
var context = new MLContext();

Load data from file:

In [6]:
var dataView = context.Data.LoadFromTextFile<AdultData>("adult.data", hasHeader: false, separatorChar: ',');

Create our mapping from labels to values:

In [7]:
var labelLookup = new Dictionary<string, bool>
{
    ["<=50K"] = false,
    ["<=50K."] = false,
    [">50K"] = true,
    [">50K."] = true,
};
labelLookup

key,value
<=50K,False
<=50K.,False
>50K,True
>50K.,True


Create an encoder passing in the label-value mappings:

In [8]:
var encoder = context.Transforms.Conversion.MapValue(inputColumnName: "Label", outputColumnName: "EncodedLabel", keyValuePairs: labelLookup);

Fit the encoder to the data:

In [9]:
var transformer = encoder.Fit(dataView);

Transform the data using the encoder:

In [10]:
var transformedDataView = transformer.Transform(dataView);

Print our transformed data:

In [11]:
context.Data.CreateEnumerable<LabelComparer>(transformedDataView, reuseRowObject: false).Take(10)

index,Label,EncodedLabel
0,<=50K,False
1,<=50K,False
2,<=50K,False
3,<=50K,False
4,<=50K,False
5,<=50K,False
6,<=50K,False
7,>50K,True
8,>50K,True
9,>50K,True
