1. NuGet Packages

In [None]:
#r "nuget:Microsoft.ML,*-*"
#r "nuget:Microsoft.Data.Analysis, *-*"
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab,*-*"

In [None]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.Data.Analysis;

using System.IO;
using System.Text;

2. Data Models

In [None]:
public class HousingData
{
    [LoadColumn(0)]
    public float CrimeRate {get;set;}

    [LoadColumn(1)]
    public float Zoned {get;set;}

    [LoadColumn(2)]
    public float Proportion {get;set;}

    [LoadColumn(3)]
    public float RiverCoast {get;set;}

    [LoadColumn(4)]
    public float NOConcentration {get;set;}

    [LoadColumn(5)]
    public float NumOfRoomsPerDwelling {get;set;}

    [LoadColumn(6)]
    public float Age {get;set;}

    [LoadColumn(7)]
    public float EmployCenterDistance {get;set;}

    [LoadColumn(8)]
    public float HighwayAccessabilityRadius {get;set;}

    [LoadColumn(9)]
    public float TaxRate {get;set;}

    [LoadColumn(10)]
    public float PTRatio {get;set;}

    [LoadColumn(11)]
    public float MedianPrice {get;set;}
}

public class PricePredictions
{
    //Probability/estimations of continous values are found in Score column - ML.Net
    [ColumnName("Score")]
    public float MedianPrice {get;set;}
}

3. Load and split the data

In [None]:
var mlContext = new MLContext();

var data = mlContext.Data.LoadFromTextFile<HousingData>("../../ML.Net.Training/Datasets/boston_housing.csv", hasHeader: true, separatorChar: ',');

//Usually want to take 20% of your complete data set to train. Of course you can play with this number to aid in accuracy
var dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.2);

In [None]:
//You can visually display your data to check if its loaded correctly
//dataSplit.TrainSet.ToTabularDataResource().Display();

4. Train the Model(Feature Engineering)

In [None]:
//SDCA is a linear regression algorithm. You can play with the algorithm to get the results you need
var model = mlContext.Regression.Trainers.Sdca(featureColumnName: "Features", labelColumnName: "Label");

//When there is no relationship between integer and label - apply One Hot Encoding. This replaces the column's values with a binary value. 
//In return, this helps avoid the model from giving more importance to a column that could throw off the model's accuracy.
//Remember to normalize your features
var pipeline = mlContext.Transforms.CopyColumns("Label", nameof(HousingData.MedianPrice))
                .Append(mlContext.Transforms.Categorical.OneHotEncoding("RiverCoast"))
                .Append(mlContext.Transforms.Concatenate("Features", 
                    nameof(HousingData.CrimeRate),
                    nameof(HousingData.Zoned),
                    nameof(HousingData.Proportion),
                    nameof(HousingData.RiverCoast),
                    nameof(HousingData.NOConcentration),
                    nameof(HousingData.NumOfRoomsPerDwelling),
                    nameof(HousingData.Age),
                    nameof(HousingData.EmployCenterDistance),
                    nameof(HousingData.HighwayAccessabilityRadius),
                    nameof(HousingData.TaxRate),
                    nameof(HousingData.PTRatio),
                    nameof(HousingData.MedianPrice)))
                .Append(mlContext.Transforms.NormalizeLpNorm("Features", "Features"))
                .Append(model);
                
var trainedModel = pipeline.Fit(dataSplit.TrainSet);

5. Evalute the Model

In [None]:
var testSetTransform = trainedModel.Transform(dataSplit.TestSet);
var metrics = mlContext.Regression.Evaluate(testSetTransform);

In [None]:
//This lets you print out the metrics
metrics

6. Save the Model

In [None]:
//ML.Net models are saved in the .mdl extension
mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, "./LinearRegressionModel.mdl");

7. Load the Model and Predict

In [None]:
var newSample = new HousingData
{
    Age = 65.2f,
    CrimeRate = 0.00632f,
    EmployCenterDistance = 4.0900f,
    HighwayAccessabilityRadius = 15.3f,
    NOConcentration = 0.538f,
    NumOfRoomsPerDwelling = 6.575f,
    Proportion = 2.31f,
    PTRatio = 15.3f,
    RiverCoast = 0,
    TaxRate = 296f,
    Zoned = 18f
};

using (var stream = new FileStream("./LinearRegressionModel.mdl", FileMode.Open, FileAccess.Read, FileShare.Read))
{
    var loadedModel = mlContext.Model.Load(stream, out _);
    var predictionEngine = mlContext.Model.CreatePredictionEngine<HousingData, PricePredictions>(loadedModel);
    
    var results = predictionEngine.Predict(newSample);

    Console.WriteLine(results.MedianPrice);
}
