In [None]:
#r "nuget:Microsoft.ML"
#r "nuget:Microsoft.Data.Analysis"
#r "nuget:XPlot.Plotly"
#r "nuget:XPlot.Plotly.Interactive"
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using XPlot.Plotly;

In [None]:
internal class CarInfo
{
    [LoadColumn(0)] public float Model { get; set; }
    [LoadColumn(1)] public float KilometersDriven { get; set; }
    [LoadColumn(2)] public float Year { get; set; }
    [LoadColumn(3)] public float Owner { get; set; }
    [LoadColumn(4)] public float FuelType { get; set; }
    [LoadColumn(5)] public float Transmission { get; set; }
    [LoadColumn(6)] public float CarCondition { get; set; }
    [LoadColumn(7)] public float Insurance { get; set; }
    [LoadColumn(8)] public float SellingPrice { get; set; }

    public override string ToString()
    {
        return
            $"{Model},{SellingPrice},{KilometersDriven},{Year},{Owner},{FuelType},{Transmission},{Insurance},{CarCondition}";
    }
}

internal class CarSellingPricePrediction
{
    public float Score { get; set; }
}

In [None]:
Dictionary<string, int> carModels = new();
Dictionary<string, int> fuelTypes = new();
Dictionary<string, int> transmissions = new();

CarInfo MapToCarInfo(string carInfoString)
{
    var carInfo = carInfoString.ToLowerInvariant().Split(',', StringSplitOptions.TrimEntries);
    var carModel = carModels.TryAdd(carInfo[0], carModels.Count + 1) ? carModels.Count : carModels[carInfo[0]];
    float.TryParse(carInfo[1], out var sellingPrice);
    float.TryParse(carInfo[2], out var kilometersDriven);
    float.TryParse(carInfo[3], out var year);
    year = DateTime.Now.Year - year;
    var owner = carInfo[4] switch
    {
        "first owner" => 1,
        "second owner" => 2,
        "third owner" => 3,
        _ => 0
    };
    var fuelType = fuelTypes.TryAdd(carInfo[5], fuelTypes.Count + 1) ? fuelTypes.Count : fuelTypes[carInfo[5]];
    var transmission = transmissions.TryAdd(carInfo[6], transmissions.Count + 1)
        ? transmissions.Count
        : transmissions[carInfo[6]];
    var date = DateTime.TryParse(carInfo[7], out var dateTime)
        ? dateTime.ToOADate()
        : DateTime.TryParseExact(carInfo[7], "dd-MM-yyyy", CultureInfo.InvariantCulture, DateTimeStyles.None,
            out var dateTime2)
            ? dateTime2.ToOADate()
            : 0;
    var insurance = (float)date;
    float.TryParse(carInfo[8], out var carCondition);

    return new CarInfo()
    {
        Model = carModel,
        KilometersDriven = kilometersDriven,
        Year = year,
        Owner = owner,
        FuelType = fuelType,
        Transmission = transmission,
        Insurance = insurance,
        CarCondition = carCondition,
        SellingPrice = sellingPrice
    };
}

var path = "cars.csv";
var carInfos = File.ReadLines(path)
    .Skip(1)
    .Select(MapToCarInfo)
    .ToArray();

In [None]:
MLContext mlContext = new(20210922);

var dataView = mlContext.Data.LoadFromEnumerable(carInfos);
var shuffledData = mlContext.Data.ShuffleRows(dataView, 20210922);
var trainTestData = mlContext.Data.TrainTestSplit(shuffledData, 0.2);
var trainData = trainTestData.TrainSet;
var testData = trainTestData.TestSet;

var features = mlContext.Data.CreateEnumerable<CarInfo>(trainData, false);
display(features.Take(10));

In [None]:
var prices = trainData.GetColumn<float>("SellingPrice");
var pricesHistogram = Chart.Plot(
    new Histogram { x = prices }
);
display(pricesHistogram);

In [None]:
var featuresModel = features.Select(f => f.Model);
var featuresKilometersDriven = features.Select(f => f.KilometersDriven);
var featuresYear = features.Select(f => f.Year);
var featuresOwner = features.Select(f => f.Owner);
var featuresFuelType = features.Select(f => f.FuelType);
var featuresTransmission = features.Select(f => f.Transmission);
var featuresCarCondition = features.Select(f => f.CarCondition);
var featuresInsurance = features.Select(f => f.Insurance);

var featuresDiagram = Chart.Plot(new[] {
    new Box { y = featuresModel, name = "Model" },
    new Box { y = featuresKilometersDriven, name = "KilometersDriven" },
    new Box { y = featuresYear, name = "Year" },
    new Box { y = featuresOwner, name = "Owner" },
    new Box { y = featuresFuelType, name = "FuelType" },
    new Box { y = featuresTransmission, name = "Transmission" },
    new Box { y = featuresCarCondition, name = "CarCondition" },
    new Box { y = featuresInsurance, name = "Insurance" }
});
display(featuresDiagram);

In [None]:
#r "nuget:MathNet.Numerics, 4.9.0"
using MathNet.Numerics.Statistics;

In [None]:
var featureColumns = new string[] { "Model", "KilometersDriven", "Year", "Owner", "FuelType", "Transmission", "CarCondition", "Insurance" };

var correlationMatrix = new List<List<double>>();
correlationMatrix.Add(featuresModel.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresKilometersDriven.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresYear.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresOwner.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresFuelType.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresTransmission.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresCarCondition.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresInsurance.Select(x => (double)x).ToList());

var length = featureColumns.Length;

var z = new double[length, length];
for (int x = 0; x < length; ++x)
{
    for (int y = 0; y < length - 1 - x; ++y)
    {
        var seriesA = correlationMatrix[x];
        var seriesB = correlationMatrix[length - 1 - y];

        var value = Correlation.Pearson(seriesA, seriesB);

        z[x, y] = value;
        z[length - 1 - y, length - 1 - x] = value;
    }

    z[x, length - 1 - x] = 1;
}

var correlationMatrixHeatmap = Chart.Plot(
    new Heatmap 
    {
        x = featureColumns,
        y = featureColumns.Reverse(),
        z = z,
        zmin = -1,
        zmax = 1
    }
);
display(correlationMatrixHeatmap);