1. Install nugets for the demo

In [1]:
#r "nuget:Microsoft.ML,*-*"
#r "nuget:Microsoft.Data.Analysis, *-*"
#r "nuget:Microsoft.DotNet.Interactive.ExtensionLab,*-*"
#r "nuget:Microsoft.ML.FastTree, *-*"

2. Add global usings

In [1]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.Data.Analysis;
using System.IO;
using System.Text;

3. Declare input data model

In [1]:
public class EnergyEfficiencyInput
{
	[LoadColumn(0)]
	public float Compactness {get;set;}

	[LoadColumn(1)]
	public float SurfaceArea {get;set;}

	[LoadColumn(2)]
	public float WallArea {get;set;}

	[LoadColumn(3)]
	public float RoofArea {get;set;}

	[LoadColumn(4)]
	public float Height {get;set;}

	[LoadColumn(5)]
	public float Orientation {get;set;}

	[LoadColumn(6)]
	public float GlazingArea {get;set;}

	[LoadColumn(7)]
	public float GlazingAreaDistribution {get;set;}

	[LoadColumn(8)]
	public float HeatingLoad {get;set;}
}

4. Delcare output data model

In [1]:
public class EnergyEfficiencyOutput
{
	[ColumnName("Score")]
	public float HeatingLoad {get;set;}
}

5. Load the dataset and split into test and training sets

In [1]:
var mlContext = new MLContext();
var trainingData = mlContext.Data.LoadFromTextFile<EnergyEfficiencyInput>("../../../ML.NET.Demo/Assets/energy_efficiency_data.csv", hasHeader: true, separatorChar: ',');
var dataSplit = mlContext.Data.TrainTestSplit(trainingData, testFraction: 0.3);

6. *Optional* Display data split

In [1]:
dataSplit.TestSet.ToTabularDataResource().Display();

7. Train the model (Feature Engineer)

In [1]:
var model = mlContext.Regression.Trainers.FastForest(numberOfLeaves: 5, numberOfTrees: 10);

var pipeline = mlContext.Transforms.CopyColumns("Label", nameof(EnergyEfficiencyInput.HeatingLoad))
					.Append(mlContext.Transforms.Concatenate("Features", new[]
					{
						nameof(EnergyEfficiencyInput.Compactness),
						nameof(EnergyEfficiencyInput.SurfaceArea),
						nameof(EnergyEfficiencyInput.WallArea),
						nameof(EnergyEfficiencyInput.RoofArea),
						nameof(EnergyEfficiencyInput.Height),
						nameof(EnergyEfficiencyInput.Orientation),
						nameof(EnergyEfficiencyInput.GlazingArea),
						nameof(EnergyEfficiencyInput.GlazingAreaDistribution),
					}))
					.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
					.Append(model);

var trainedModel = pipeline.Fit(dataSplit.TrainSet);

8. Evaluate the model

In [1]:
var testSet = trainedModel.Transform(dataSplit.TestSet);
var metrics = mlContext.Regression.Evaluate(testSet);

metrics

MeanAbsoluteError,MeanSquaredError,RootMeanSquaredError,LossFunction,RSquared
2.3277383072431697,8.950440543380186,2.9917286881300225,8.95044053196474,0.9100971443333749


9. Save the model

In [1]:
mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, "./DecisionTreeRegression.mdl");

10. Load the model and test

In [1]:
var newSample = new EnergyEfficiencyInput
{
	Compactness = 0.98f,
	SurfaceArea = 514.5f,
	WallArea = 294f,
	RoofArea = 110.25f,
	Height = 7f,
	Orientation = 2f,
	GlazingArea = 0f,
	GlazingAreaDistribution = 0f
};

using (var stream = new FileStream("./DecisionTreeRegression.mdl", FileMode.Open, FileAccess.Read))
{
    var model = mlContext.Model.Load(stream, out var _);
    var predictionEngine = mlContext.Model.CreatePredictionEngine<EnergyEfficiencyInput, EnergyEfficiencyOutput>(model);
    var results = predictionEngine.Predict(newSample);

    results.Display();
}

HeatingLoad
26.81036
