In [1]:
#r "nuget: Microsoft.ML"
open Microsoft.ML
open Microsoft.ML.Data
open System.IO
open System.Net

Create a type that represents our data:

In [2]:
[<CLIMutable>]
type AbaloneData =
    {
        [<LoadColumn(0)>]
        Sex : string

        [<LoadColumn(1)>]
        Length : float32

        [<LoadColumn(2)>]
        Diameter : float32

        [<LoadColumn(3)>]
        Height : float32

        [<LoadColumn(4)>]
        WholeWeight : float32

        [<LoadColumn(5)>]
        ShuckedWeight : float32

        [<LoadColumn(6)>]
        VisceraWeight : float32

        [<LoadColumn(7)>]
        ShellWeight : float32

        [<LoadColumn(8)>]
        Rings : single
    }

Download the data from [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/machine-learning-databases/abalone):

In [3]:
if not <| File.Exists("abalone.data") then
    use client = new WebClient()
    client.DownloadFile("https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data", "abalone.data")

File.ReadLines("abalone.data")
|> Seq.take 5

index,value
0,"M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15"
1,"M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7"
2,"F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9"
3,"M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10"
4,"I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7"


Create a new MLContext:

In [4]:
let context = new MLContext()

Read the data in the file:

In [5]:
let dataView = context.Data.LoadFromTextFile<AbaloneData>("abalone.data", hasHeader = false, separatorChar = ',')

Create a One-Hot Encoder:

In [6]:
let encoder = context.Transforms.Categorical.OneHotEncoding(inputColumnName = "Sex", outputColumnName = "EncodedSex")

Create a transformer and fit it to the data:

In [7]:
let transformer = encoder.Fit(dataView)
let transformedDataView = transformer.Transform(dataView)

Create a type to hold the value and its encoding:

In [8]:
[<CLIMutable>]
type SexEncoding =
    {
        Sex : string
        EncodedSex : single[]
    }

Encode the data and randomly select 10 examples:

In [9]:
let encodedLabels = context.Data.CreateEnumerable<SexEncoding>(transformedDataView, reuseRowObject = false)
let rand = new Random()
    
encodedLabels
|> Seq.sortBy (fun _ -> rand.Next())
|> Seq.take 10

index,Sex,EncodedSex
0,M,"[ 1, 0, 0 ]"
1,I,"[ 0, 0, 1 ]"
2,I,"[ 0, 0, 1 ]"
3,F,"[ 0, 1, 0 ]"
4,I,"[ 0, 0, 1 ]"
5,I,"[ 0, 0, 1 ]"
6,M,"[ 1, 0, 0 ]"
7,M,"[ 1, 0, 0 ]"
8,M,"[ 1, 0, 0 ]"
9,I,"[ 0, 0, 1 ]"
