In [None]:
#r "nuget:Microsoft.ML"
#r "nuget:XPlot.Plotly"

In [None]:
using System;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using XPlot.Plotly;

In [None]:
/// <summary>
/// The HouseBlockData class holds one single housing block data record.
/// </summary>
public class HouseBlockData
{
    [LoadColumn(0)] public float Longitude { get; set; }
    [LoadColumn(1)] public float Latitude { get; set; }
    [LoadColumn(2)] public float HousingMedianAge { get; set; }
    [LoadColumn(3)] public float TotalRooms { get; set; }
    [LoadColumn(4)] public float TotalBedrooms { get; set; }
    [LoadColumn(5)] public float Population { get; set; }
    [LoadColumn(6)] public float Households { get; set; }
    [LoadColumn(7)] public float MedianIncome { get; set; }
    [LoadColumn(8)] public float MedianHouseValue { get; set; }
}

In [None]:
// filename for data set
var dataPath = Path.Combine(Environment.CurrentDirectory, "california_housing.csv");

// create the machine learning context
var context = new MLContext();

// load the dataset
var data = context.Data.LoadFromTextFile<HouseBlockData>(
    path: dataPath, 
    hasHeader:true, 
    separatorChar: ',');

In [None]:
// get an array of housing data
var houses = context.Data.CreateEnumerable<HouseBlockData>(data, reuseRowObject: false).ToArray();

In [None]:
display(houses.Take(10));

index,Longitude,Latitude,HousingMedianAge,TotalRooms,TotalBedrooms,Population,Households,MedianIncome,MedianHouseValue
0,-114.31,34.19,15,5612,1283,1015,472,1.4936,66900
1,-114.47,34.4,19,7650,1901,1129,463,1.82,80100
2,-114.56,33.69,17,720,174,333,117,1.6509,85700
3,-114.57,33.64,14,1501,337,515,226,3.1917,73400
4,-114.57,33.57,20,1454,326,624,262,1.925,65500
5,-114.58,33.63,29,1387,236,671,239,3.3438,74000
6,-114.58,33.61,25,2907,680,1841,633,2.6768,82400
7,-114.59,34.83,41,812,168,375,158,1.7083,48500
8,-114.59,33.61,34,4789,1175,3134,1056,2.1782,58400
9,-114.6,34.83,46,1497,309,787,271,2.1908,48100


In [None]:
// plot median house value by latitude and longitude
var chart = Chart.Plot(
    new Scattergl()
    {
        x = houses.Select(v => v.Longitude),
        y = houses.Select(v => v.Latitude),
        mode = "markers",
        marker = new Marker()
        {
            color = houses.Select(v => v.MedianHouseValue),
            colorscale = "Jet"
        }
    }
);
chart.WithXTitle("Longitude");
chart.WithYTitle("Latitude");
chart.WithTitle("Median house value by location");
chart.Width = 600;
chart.Height = 600;
display(chart);

Height,Id,PlotlySrc,Width
600,1d9b8dda-a7ae-4221-86d3-c1f30ec11b0e,https://cdn.plot.ly/plotly-latest.min.js,600


In [None]:
// plot median house value by median income
chart = Chart.Plot(
    new Scattergl()
    {
        x = houses.Select(v => v.MedianIncome),
        y = houses.Select(v => v.MedianHouseValue),
        mode = "markers"
    }
);
chart.WithXTitle("Median Income");
chart.WithYTitle("Median House Value");
chart.WithTitle("Median house value by income");
chart.Width = 600;
chart.Height = 600;
display(chart);

Height,Id,PlotlySrc,Width
600,aac0a253-a9af-4671-9b28-fba556e3ca72,https://cdn.plot.ly/plotly-latest.min.js,600


In [None]:
// keep only records with a median house value <= 500,000
data = context.Data.FilterRowsByColumn(
    data,
    "MedianHouseValue",
    upperBound: 500_000
);

In [None]:
// get an array of housing data
houses = context.Data.CreateEnumerable<HouseBlockData>(data, reuseRowObject: false).ToArray();

// plot median house value by median income
chart = Chart.Plot(
    new Scattergl()
    {
        x = houses.Select(v => v.MedianIncome),
        y = houses.Select(v => v.MedianHouseValue),
        mode = "markers"
    }
);
chart.WithXTitle("Median Income");
chart.WithYTitle("Median House Value");
chart.WithTitle("Median house value by income");
chart.Width = 600;
chart.Height = 600;
display(chart);

Height,Id,PlotlySrc,Width
600,1bc14f08-eae8-4577-bcba-31020336ec51,https://cdn.plot.ly/plotly-latest.min.js,600


In [None]:
/// <summary>
/// The ToMedianHouseValue class is used in a column data conversion.
/// </summary>
public class ToMedianHouseValue
{
    public float NormalizedMedianHouseValue { get; set; }
}

In [None]:
// build a data loading pipeline
// step 1: divide the median house value by 1000
var pipeline = context.Transforms.CustomMapping<HouseBlockData, ToMedianHouseValue>(
    (input, output) => { output.NormalizedMedianHouseValue = input.MedianHouseValue / 1000; },
    contractName: "MedianHouseValue"
);

In [None]:
using Microsoft.AspNetCore.Html;
Formatter<DataDebuggerPreview>.Register((preview, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(preview.ColumnView.Select(c => (IHtmlContent) th(c.Column.Name)));
    var rows = new List<List<IHtmlContent>>();
    var count = 0;
    foreach (var row in preview.RowView)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(count));
        foreach (var obj in row.Values)
        {
            cells.Add(td(obj.Value));
        }
        rows.Add(cells);
        count++;
    }
    
    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));
    
    writer.Write(t);
}, "text/html");

In [None]:
// run the pipeline and show the first 10 records
var model = pipeline.Fit(data);
var transformedData = model.Transform(data);
var preview = transformedData.Preview(maxRows: 10);
display(preview);

In [None]:
/// <summary>
/// The FromLocation class is used in a column data conversion.
/// </summary>
public class FromLocation
{
    public float[] EncodedLongitude { get; set; }
    public float[] EncodedLatitude { get; set; }
}

/// <summary>
/// The ToLocation class is used in a column data conversion.
/// </summary>
public class ToLocation
{
    public float[] Location { get; set; }
}

In [None]:
// step 2: bin the longitude
var pipeline2 = pipeline.Append(context.Transforms.NormalizeBinning(
        inputColumnName: "Longitude",
        outputColumnName: "BinnedLongitude",
        maximumBinCount: 10
    ))

    // step 3: bin the latitude
    .Append(context.Transforms.NormalizeBinning(
        inputColumnName: "Latitude",
        outputColumnName: "BinnedLatitude",
        maximumBinCount: 10
    ));

In [None]:
// run the pipeline and get the results
var model = pipeline2.Fit(data);
var transformedData = model.Transform(data);
var preview = transformedData.Preview(maxRows: 10);
display(preview);

In [None]:
// a helper class to access the new binned columns
public class BinnedHouseBlockData
{
    public float BinnedLongitude { get; set; }
    public float BinnedLatitude { get; set; }
    public float MedianHouseValue { get; set; }
}

// get an array of binned housing data
var binnedHhouses = context.Data.CreateEnumerable<BinnedHouseBlockData>(transformedData, reuseRowObject: false).ToArray();

// plot median house value by binned latitude and longitude
var chart = Chart.Plot(
    new Graph.Scattergl()
    {
        x = binnedHhouses.Select(v => v.BinnedLongitude),
        y = binnedHhouses.Select(v => v.BinnedLatitude),
        mode = "markers",
        marker = new Graph.Marker()
        {
            symbol = "square",
            size = 32,
            color = binnedHhouses.Select(v => v.MedianHouseValue),
            colorscale = "Jet"
        }
    }
);
chart.WithXTitle("Binned Longitude");
chart.WithYTitle("Binned Latitude");
chart.WithTitle("Median house value by binned location");
chart.Width = 600;
chart.Height = 600;
display(chart);

In [None]:
// step 4: one-hot encode the longitude
var pipeline3 = pipeline2.Append(context.Transforms.Categorical.OneHotEncoding(
        inputColumnName: "BinnedLongitude",
        outputColumnName: "EncodedLongitude"
    ))

    // step 5: one-hot encode the latitude
    .Append(context.Transforms.Categorical.OneHotEncoding(
        inputColumnName: "BinnedLatitude",
        outputColumnName: "EncodedLatitude"
    ))
    
    // step 6: cross the two one-hot encoded columns
    .Append(context.Transforms.CustomMapping<FromLocation, ToLocation>(
        (input, output) => { 
            output.Location = new float[input.EncodedLongitude.Length * input.EncodedLatitude.Length];
            var index = 0;
            for (var i = 0; i < input.EncodedLongitude.Length; i++)
                for (var j = 0; j < input.EncodedLatitude.Length; j++)
                    output.Location[index++] = input.EncodedLongitude[i] * input.EncodedLatitude[j];
        },
        contractName: "Location"
    ))

    // step 7: remove all the columns we don't need anymore
    .Append(context.Transforms.DropColumns(
        "MedianHouseValue",
        "Longitude",
        "Latitude",
        "BinnedLongitude",
        "BinnedLatitude",
        "EncodedLongitude",
        "EncodedLatitude"
    ));

In [None]:
// run the pipeline and get the results
var model = pipeline3.Fit(data);
var transformedData = model.Transform(data);
var preview = transformedData.Preview(maxRows: 10);
display(preview);

In [None]:
var vectors = from r in preview.RowView
              from v in r.Values where v.Key == "Location"
              select ((VBuffer<Single>)v.Value).DenseValues();
display(vectors);