## CV Cross Validation

In [1]:
#r "nuget: Microsoft.ML, 5.0.0"
#r "nuget: Microsoft.ML.FastTree, 5.0.0"
#r "nuget: Microsoft.ML.LightGbm, 5.0.0"

using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;

In [2]:
public class HousingData
{
    [LoadColumn(0)]
    public string City { get; set; }
    [LoadColumn(1)]
    public string Locality { get; set; }
    [LoadColumn(2)]
    public float Bedrooms { get; set; }
    [LoadColumn(3)]
    public float Bathrooms { get; set; }
    [LoadColumn(4)]
    public float SqftLiving { get; set; }
    [LoadColumn(5)]
    public float SqftLot { get; set; }
    [LoadColumn(6)]
    public float Floors { get; set; }
    [LoadColumn(7)]
    public float YearBuilt { get; set; }
    [LoadColumn(8)]
    public float YearRenovated { get; set; }
    [LoadColumn(9)]
    public string PropertyType { get; set; }
    [LoadColumn(10)]
    public float Parking { get; set; }
    [LoadColumn(11)]
    public string Facing { get; set; }
    [LoadColumn(12)]
    public string Lift { get; set; }
    [LoadColumn(13)]
    public string Furnishing { get; set; }
    [LoadColumn(14)]
    public float NearbySchools { get; set; }
    [LoadColumn(15)]
    public float NearbyHospitals { get; set; }
    [LoadColumn(16)]
    public string Society { get; set; }
    [LoadColumn(17)]
    public float Balcony { get; set; }
    [LoadColumn(18)]
    public string Garden { get; set; }
    [LoadColumn(19)] 
    public float Price { get; set; }
}
public class Prediction
{
     [ColumnName("Score")]
    public float Score { get; set; }
}

In [3]:
MLContext mlContext = new MLContext();
IDataView data = mlContext.Data.LoadFromTextFile<HousingData>(path: "HousePrices.csv", hasHeader: true, separatorChar: ',');

In [4]:
// Model pipeline   
string [] columns=new string[]
{
    "Bedrooms","Bathrooms","SqftLiving","SqftLot","Floors",
    "YearBuilt","YearRenovated","Parking",
    "NearbySchools","NearbyHospitals","Balcony"
};

var pipeline = mlContext.Transforms.Concatenate("Features", columns)
.Append(mlContext.Transforms.NormalizeMeanVariance("Features"))
.Append(mlContext.Regression.Trainers.Sdca(labelColumnName: "Price", featureColumnName: "Features"));

In [5]:
// cross validation
var cvResults = mlContext.Regression.CrossValidate(data, pipeline, numberOfFolds: 5, labelColumnName: "Price");

In [6]:
// cv cvResults
foreach (var fold in cvResults)
{
    Console.WriteLine($"RSquared: {fold.Metrics.RSquared}, RMSE: {fold.Metrics.RootMeanSquaredError}");
}

RSquared: 0.9095771854476522, RMSE: 4170722.7226472567
RSquared: 0.9061861659785091, RMSE: 4246397.499211685
RSquared: 0.8882223546511782, RMSE: 4550445.638276457
RSquared: 0.9116214538009507, RMSE: 4112865.5067793387
RSquared: 0.9001027016240472, RMSE: 3830020.171065115


In [7]:
var newpipeline = mlContext.Transforms.Concatenate("Features", columns);

In [8]:
// Split and train the model
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.2);
// Train the model
var model=newpipeline.Fit(split.TrainSet);
// Test the model
var prediction=model.Transform(split.TestSet);

In [11]:
// Add Trainers as collection
var trainers=new IEstimator<ITransformer>[]
{
    mlContext.Regression.Trainers.Sdca(labelColumnName: "Price", featureColumnName: "Features"),
    mlContext.Regression.Trainers.FastTree(labelColumnName: "Price", featureColumnName: "Features"),
    mlContext.Regression.Trainers.LightGbm(labelColumnName: "Price", featureColumnName: "Features"),
    mlContext.Regression.Trainers.LbfgsPoissonRegression(labelColumnName: "Price", featureColumnName: "Features")
};

In [12]:
// Perform cross validation for each trainer

foreach(var trainer in trainers)
{
    var foldResults=mlContext.Regression.CrossValidate(data,newpipeline.Append(trainer),numberOfFolds:5,labelColumnName:"Price");
    var avgRSquared=foldResults.Average(f=>f.Metrics.RSquared);
    var avgRMSE=foldResults.Average(f=>f.Metrics.RootMeanSquaredError);
    Console.WriteLine($"{trainer.ToString(),-60} | RSquared: {avgRSquared:F4} | RMSE: {avgRMSE:F4}");
}

Microsoft.ML.Trainers.SdcaRegressionTrainer                  | RSquared: 0.9039 | RMSE: 4167562.4992
Microsoft.ML.Trainers.FastTree.FastTreeRegressionTrainer     | RSquared: 0.8785 | RMSE: 4670779.3317
Microsoft.ML.Trainers.LightGbm.LightGbmRegressionTrainer     | RSquared: 0.8783 | RMSE: 4683225.1552
Microsoft.ML.Trainers.LbfgsPoissonRegressionTrainer          | RSquared: 0.7068 | RMSE: 7253932.4669
