Add Packages

In [None]:
#r "nuget:Microsoft.ML, *-*"
#r "nuget:Microsoft.ML.FastTree, *-*"
#r "nuget:Microsoft.Data.Analysis, *-*"
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab, *-*"

Loading extensions from `Microsoft.DotNet.Interactive.ExtensionLab.dll`

Loading extensions from `Microsoft.Data.Analysis.Interactive.dll`

In [None]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.Data.Analysis;

using System.IO;
using System.Text;

Data Models

In [None]:
public class BostonHousingData
{
    [LoadColumn(0)]
    public float CrimeRate { get; set; }

    [LoadColumn(1)]
    public float Zoned { get; set; }

    [LoadColumn(2)]
    public float Proportion { get; set; }

    [LoadColumn(3)]
    public int RiverCoast { get; set; }

    [LoadColumn(4)]
    public float NOConcentration { get; set; }

    [LoadColumn(5)]
    public float NumOfRoomsPerDwelling { get; set; }

    [LoadColumn(6)]
    public float Age { get; set; }

    [LoadColumn(7)]
    public float EmployeeCenterDistance { get; set; }

    [LoadColumn(8)]
    public float HighwayAccesabilityRadius { get; set; }

    [LoadColumn(9)]
    public float TaxRate { get; set; }

    [LoadColumn(10)]
    public float PTRatio { get; set; }

    [LoadColumn(11)]
    public float MedianPrice { get; set; }
}

public class BostonHousingPricePredictions
{
    [ColumnName("Score")]
    public float MedianPrice;
}

Load And Split Data

In [None]:
var mlContext = new MLContext();
var data = mlContext.Data.LoadFromTextFile<BostonHousingData>("./Data/boston_housing.csv", hasHeader: true, separatorChar: ',');

// Split data into testing and training data
var dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.2);

In [None]:
dataSplit.TrainSet.ToTabularDataResource()

CrimeRate,Zoned,Proportion,RiverCoast,NOConcentration,NumOfRoomsPerDwelling,Age,EmployeeCenterDistance,HighwayAccesabilityRadius,TaxRate,PTRatio,MedianPrice
0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9
0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9
0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83
0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63
0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9
0.14455,12.5,7.87,0,0.524,6.172,96.1,5.9505,5,311,15.2,396.9
0.21124,12.5,7.87,0,0.524,5.631,100.0,6.0821,5,311,15.2,386.63
0.17004,12.5,7.87,0,0.524,6.004,85.9,6.5921,5,311,15.2,386.71
0.22489,12.5,7.87,0,0.524,6.377,94.3,6.3467,5,311,15.2,392.52
0.11747,12.5,7.87,0,0.524,6.009,82.9,6.2267,5,311,15.2,396.9


In [None]:
dataSplit.TestSet.ToTabularDataResource()

CrimeRate,Zoned,Proportion,RiverCoast,NOConcentration,NumOfRoomsPerDwelling,Age,EmployeeCenterDistance,HighwayAccesabilityRadius,TaxRate,PTRatio,MedianPrice
0.02985,0.0,2.18,0,0.458,6.43,58.7,6.0622,3,222,18.7,394.12
0.08829,12.5,7.87,0,0.524,6.012,66.6,5.5605,5,311,15.2,395.6
0.80271,0.0,8.14,0,0.538,5.456,36.6,3.7965,4,307,21.0,288.99
0.77299,0.0,8.14,0,0.538,6.495,94.4,4.4547,4,307,21.0,387.94
0.08014,0.0,5.96,0,0.499,5.85,41.5,3.9342,5,279,19.2,396.9
0.12269,0.0,6.91,0,0.448,6.069,40.0,5.7209,3,233,17.9,389.39
0.17142,0.0,6.91,0,0.448,5.682,33.8,5.1004,3,233,17.9,396.9
0.25387,0.0,6.91,0,0.448,5.399,95.3,5.87,3,233,17.9,396.9
0.13554,12.5,6.07,0,0.409,5.594,36.8,6.498,4,345,18.9,396.9
0.15876,0.0,10.81,0,0.413,5.961,17.5,5.2873,4,305,19.2,376.94


Train The Model

In [None]:
var model = mlContext.Regression.Trainers.FastForest(numberOfLeaves: 30, numberOfTrees: 100);

var pipeline = mlContext.Transforms.CopyColumns("Label", nameof(BostonHousingData.MedianPrice))
                .Append(mlContext.Transforms.Categorical.OneHotEncoding("RiverCoast"))
                .Append(mlContext.Transforms.Concatenate("Features",
                    nameof(BostonHousingData.CrimeRate),
                    nameof(BostonHousingData.Zoned),
                    nameof(BostonHousingData.Proportion),
                    nameof(BostonHousingData.RiverCoast),
                    nameof(BostonHousingData.NOConcentration),
                    nameof(BostonHousingData.NumOfRoomsPerDwelling),
                    nameof(BostonHousingData.Age),
                    nameof(BostonHousingData.EmployeeCenterDistance),
                    nameof(BostonHousingData.HighwayAccesabilityRadius),
                    nameof(BostonHousingData.TaxRate),
                    nameof(BostonHousingData.PTRatio),
                    nameof(BostonHousingData.MedianPrice)))
                .Append(mlContext.Transforms.NormalizeLogMeanVariance("Features", "Features"))
                .Append(model);

var trainedModel = pipeline.Fit(dataSplit.TrainSet);

Evaluate The Model

In [None]:
var testSetTransform = trainedModel.Transform(dataSplit.TestSet);
var metrics = mlContext.Regression.Evaluate(testSetTransform);

In [None]:
metrics

MeanAbsoluteError,MeanSquaredError,RootMeanSquaredError,LossFunction,RSquared
15.69784154680868,863.6044318926652,29.38714739291082,863.6044448889637,0.9247598838100471


Save The Model

In [None]:
mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, "./RandomForestRegressionModel.mdl");

Load The Model And Predict

In [None]:
var newSample = new BostonHousingData
{
    Age = 65.2f,
    CrimeRate = 0.00632f,
    EmployeeCenterDistance = 4.0900f,
    HighwayAccesabilityRadius = 15.3f,
    NOConcentration = 0.538f,
    NumOfRoomsPerDwelling = 6.575f,
    Proportion = 1f,
    PTRatio = 15.3f,
    RiverCoast = 0,
    TaxRate = 296f,
    Zoned = 23f
};

using (var stream = new FileStream("./RandomForestRegressionModel.mdl", FileMode.Open, FileAccess.Read, FileShare.Read))
{
    var loadedModel = mlContext.Model.Load(stream, out _);
    var predictionEngine = mlContext.Model.CreatePredictionEngine<BostonHousingData, BostonHousingPricePredictions>(loadedModel);

    var prediction = predictionEngine.Predict(newSample);

    Console.WriteLine("-----------------------");
    Console.WriteLine($"Prediction : {prediction.MedianPrice: #.##}");
    Console.WriteLine("-----------------------");
}

-----------------------
Prediction :  161.97
-----------------------
