### Binary Classification SDCA Logistic Trainer
https://learn.microsoft.com/en-us/dotnet/machine-learning/resources/tasks#binary-classification

In [2]:
#r "nuget: Microsoft.ML, 5.0.0"
#r "nuget: Microsoft.ML.FastTree, 5.0.0"
#r "nuget: Microsoft.ML.LightGbm, 5.0.0"

using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;

In [3]:
// Input and Output Models

public class DiabetesInput
{
    // Demographics
    [LoadColumn(0)]
    public float Age { get; set; }
    [LoadColumn(1)]     
    public string Gender { get; set; }
    [LoadColumn(2)]
    public string Ethnicity { get; set; }
    [LoadColumn(3)]     
    public string Education_Level { get; set; }
    [LoadColumn(4)]
    public string Income_Level { get; set; }
    [LoadColumn(5)]
    public string Employment_Status { get; set; }

    // Lifestyle
    [LoadColumn(6)]
    public string Smoking_Status { get; set; }
    [LoadColumn(7)]
    public float Alcohol_Consumption_Per_Week { get; set; }
    [LoadColumn(8)]
    public float Physical_Activity_Minutes_Per_Week { get; set; }
    [LoadColumn(9)]
    public float Diet_Score { get; set; }
    [LoadColumn(10)]
    public float Sleep_Hours_Per_Day { get; set; }
    [LoadColumn(11)]
    public float Screen_Time_Hours_Per_Day { get; set; }

    // Medical History
    [LoadColumn(12)]
    public float Family_History_Diabetes { get; set; }
    [LoadColumn(13)]
    public float Hypertension_History { get; set; }
    [LoadColumn(14)]
    public float Cardiovascular_History { get; set; }

    // Body Measurements
    [LoadColumn(15)]
    public float BMI { get; set; }
    [LoadColumn(16)]
    public float Waist_To_Hip_Ratio { get; set; }

    // Vitals
    [LoadColumn(17)]
    public float Systolic_BP { get; set; }
    [LoadColumn(18)]
    public float Diastolic_BP { get; set; }
    [LoadColumn(19)]
    public float Heart_Rate { get; set; }

    // Lipid Profile
    [LoadColumn(20)]
    public float Cholesterol_Total { get; set; }
    [LoadColumn(21)]
    public float HDL_Cholesterol { get; set; }
    [LoadColumn(22)]
    public float LDL_Cholesterol { get; set; }
    [LoadColumn(23)]
    public float Triglycerides { get; set; }

    // Glucose & Insulin
    [LoadColumn(24)]
    public float Glucose_Fasting { get; set; }
    [LoadColumn(25)]
    public float Glucose_Postprandial { get; set; }
    [LoadColumn(26)]
    public float Insulin_Level { get; set; }
    [LoadColumn(27)]
    public float HbA1c { get; set; }

    // Risk Indicators
    [LoadColumn(28)]
    public float Diabetes_Risk_Score { get; set; }
    [LoadColumn(29)]
    public string Diabetes_Stage { get; set; }    
    [LoadColumn(30)]   
    public bool Diagnosed_Diabetes { get; set; }
}


public class DiabetesPrediction
{
    [ColumnName("PredictedLabel")]
    public bool Predicted_Diabetes { get; set; }

    public float Probability { get; set; }
    public float Score { get; set; }
}


In [4]:
//load the data from csv file
MLContext mlContext = new MLContext();
IDataView data = mlContext.Data.LoadFromTextFile<DiabetesInput>(path: "Diabetes_dataset.csv", hasHeader: true, separatorChar: ',');

In [5]:
//Apply Onhotencoding for categorical columns and concatenate all features
//Then normalize and train the model
var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("GenderEncoded", "Gender")
.Append(mlContext.Transforms.Categorical.OneHotEncoding("EthnicityEncoded", "Ethnicity"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Education_LevelEncoded", "Education_Level"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Income_LevelEncoded", "Income_Level"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Employment_StatusEncoded", "Employment_Status"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Smoking_StatusEncoded", "Smoking_Status"))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Diabetes_StageEncoded", "Diabetes_Stage"))
.Append(mlContext.Transforms.Concatenate("Features",
    "Age",
    "GenderEncoded",
    "EthnicityEncoded",
    "Education_LevelEncoded",
    "Income_LevelEncoded",
    "Employment_StatusEncoded",
    "Smoking_StatusEncoded",
    "Diabetes_StageEncoded",
    "Alcohol_Consumption_Per_Week",
    "Physical_Activity_Minutes_Per_Week",
    "Diet_Score",
    "Sleep_Hours_Per_Day",
    "Screen_Time_Hours_Per_Day",
    "Family_History_Diabetes",
    "Hypertension_History",
    "Cardiovascular_History",
    "BMI",
    "Waist_To_Hip_Ratio",
    "Systolic_BP",
    "Diastolic_BP",
    "Heart_Rate",
    "Cholesterol_Total",
    "HDL_Cholesterol",
    "LDL_Cholesterol",
    "Triglycerides"
))
.Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Diagnosed_Diabetes", featureColumnName: "Features"));

In [6]:
// Split and train the model
var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.2);
// Train the model
var model=pipeline.Fit(split.TrainSet);
// Test the model
var prediction=model.Transform(split.TestSet);

In [7]:
// Evualuate the model
var metrics=mlContext.BinaryClassification.Evaluate(prediction,labelColumnName:"Diagnosed_Diabetes",scoreColumnName:"Score");

In [20]:
// Print the metrics
Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:P2}");

Accuracy: 99.71%
F1 Score: 99.76%


In [22]:
// confusion matrix
var confusionMatrix = metrics.ConfusionMatrix;
Console.WriteLine();
Console.WriteLine("===== Confusion Matrix =====");

Console.WriteLine($"TP: {confusionMatrix.Counts[1][1]}");
Console.WriteLine($"FP: {confusionMatrix.Counts[0][1]}");
Console.WriteLine($"FN: {confusionMatrix.Counts[1][0]}");
Console.WriteLine($"TN: {confusionMatrix.Counts[0][0]}");


===== Confusion Matrix =====
TP: 8018
FP: 40
FN: 17
TN: 11789
