# Load packages

In [1]:
# skip reinstalling packages we already have
using Pkg

pkgs = [
    "MLJ", "MLJBase", "MLJModels", "MLJEnsembles", "MLJLinearModels",
    "DecisionTree", "MLJDecisionTreeInterface", "NaiveBayes", 
    "MLJNaiveBayesInterface", "EvoTrees", "CategoricalArrays", "Random",
    "LIBSVM", "MLJLIBSVMInterface", "Plots", "MLJModelInterface",
    "CSV", "DataFrames", "UrlDownload", "XGBoost", "NNlib"
]

# Filter out packages already installed
missing_pkgs = filter(pkg -> !(pkg in keys(Pkg.project().dependencies)), pkgs)

if !isempty(missing_pkgs)
    println("Installing missing packages: ", missing_pkgs)
    Pkg.add(missing_pkgs)
else
    println(" All required packages are already installed.")
end


 All required packages are already installed.


In [2]:
using MLJ
using MLJBase
using LIBSVM
using NNlib
using Flux
using Flux.Losses
using Statistics

In [3]:
# Load PCA
PCA_model = MLJ.@load PCA pkg="MultivariateStats"

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mFor silent loading, specify `verbosity=0`. 


import MLJMultivariateStatsInterface ✔


MLJMultivariateStatsInterface.PCA

In [4]:
#Load your library of functions
include("utils.2.2.jl")
# Set a global random seed for reproducibility
using Random
Random.seed!(42)

TaskLocalRNG()

# Load Data

In [None]:
using CSV, DataFrames, Random
using CategoricalArrays

df = CSV.read("./data/updated_pollution_dataset.csv", DataFrame)

# Some log
println("First 5 rows of df:")
show(df[1:5, :], allcols=true)


# Convert last column to categorical (in-place!)
df[!, end] = categorical(df[!, end])

# Extract the integer codes of the categories
targets = Float32.(levelcode.(df[!, end]))

# Use all columns except the last one as inputs
inputs = Matrix{Float32}(df[:, 1:end-1])

println("First 5 inputs::")
for i in 1:5
    println(inputs[i, :])
end

println("\n\nFirst 5 targets:")
println(targets[1:5])

# Extract labels (categories) as strings
label_names = levels(df[!, 10])
println("Labels: ", label_names)

First 5 rows of df:
[1m5×10 DataFrame[0m
[1m Row [0m│[1m Temperature [0m[1m Humidity [0m[1m PM2.5   [0m[1m PM10    [0m[1m NO2     [0m[1m SO2     [0m[1m CO      [0m[1m Proximity_to_Industrial_Areas [0m[1m Population_Density [0m[1m Air Quality [0m
     │[90m Float64     [0m[90m Float64  [0m[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m[90m Float64                       [0m[90m Int64              [0m[90m String15    [0m
─────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   1 │        29.8      59.1      5.2     17.9     18.9      9.2     1.72                            6.3                 319  Moderate
   2 │        28.3      75.6      2.3     12.2     30.8      9.7     1.64                            6.0                 611  Moderate
   3 │        23.1      74.7     26.7     33.8     24.4     12.6     1.63                   

In [6]:
results = Dict()
crossValidationIndices = crossvalidation(targets, 5)

5000-element Vector{Int64}:
 1
 3
 5
 4
 1
 3
 3
 5
 1
 1
 5
 1
 2
 ⋮
 3
 4
 4
 4
 5
 1
 2
 4
 4
 3
 5
 1

In [None]:
function printExperimentResult(model, hyperparams, results, class_labels)
    (
        (accuracy_mean, accuracy_std),
        (error_rate_mean, error_rate_std),
        (sensitivity_mean, sensitivity_std),
        (specificity_mean, specificity_std),
        (ppv_mean, ppv_std),
        (npv_mean, npv_std),
        (f1_mean, f1_std),
        cm
    ) = results

    println("\n=====================================================")
    println(" Model: $model")
    println(" Hyperparameters: $hyperparams")
    println("=====================================================")

    println(" Accuracy (mean)               : ", round(accuracy_mean, digits=4))
    println(" Accuracy (std)                : ", round(accuracy_std, digits=4))

    println(" Error Rate (mean)             : ", round(error_rate_mean, digits=4))
    println(" Error Rate (std)              : ", round(error_rate_std, digits=4))

    println(" Sensitivity/Recall (mean)     : ", round(sensitivity_mean, digits=4))
    println(" Sensitivity/Recall (std)      : ", round(sensitivity_std,  digits=4))

    println(" Specificity (mean)            : ", round(specificity_mean, digits=4))
    println(" Specificity (std)             : ", round(specificity_std,  digits=4))

    println(" PPV (mean)                    : ", round(ppv_mean,         digits=4))
    println(" PPV (std)                     : ", round(ppv_std,          digits=4))

    println(" NPV (mean)                    : ", round(npv_mean,         digits=4))
    println(" NPV (std)                     : ", round(npv_std,          digits=4))

    println(" F1 Score (mean)               : ", round(f1_mean,          digits=4))
    println(" F1 Score (std)                : ", round(f1_std,           digits=4))

    println("\nConfusion Matrix:")
    println(cm)

    PrettyTables.pretty_table(DataFrame(cm, :auto); header=class_labels, row_labels=class_labels)

    println("=====================================================\n")
end


printExperimentResult (generic function with 1 method)

# Artificial Neural Networks

In [None]:
############# 1. ARTIFICIAL NEURAL NETWORKS (8+ topologies) #############
default_ann = Dict(      
    "numExecutions" => 5,
    "transferFunctions" => [σ, σ, σ],
    "maxEpochs" => 200,
    "minLoss" => 0.0,
    "learningRate" => 0.01,
    "validationRatio" => 0.1,
    "maxEpochsVal" => 20
)

ann_search_space = [
    Dict("topology"=>[128, 128]),
    Dict("topology"=>[256,256]),
    Dict("topology"=>[512, 512]),
    Dict("topology"=>[128, 128,128]),
    Dict("topology"=>[256,256,256]),
    Dict("topology"=>[512, 512,512]),
    Dict("topology"=>[128, 128,128,128]),
    Dict("topology"=>[256,256,256,256])
]

8-element Vector{Dict{String, Vector{Int64}}}:
 Dict("topology" => [8, 1])
 Dict("topology" => [10, 5, 2])
 Dict("topology" => [12, 6, 3])
 Dict("topology" => [6, 3])
 Dict("topology" => [20, 10, 5])
 Dict("topology" => [5, 4, 3])
 Dict("topology" => [16, 8, 4])
 Dict("topology" => [30, 15, 7])

In [9]:
########################
# 1. ANN GRID SEARCH
########################
ann_results = []

for hp in ann_search_space
    println("\n=== ANN experiment: topology = $(hp["topology"]) ===")
    full_hp = merge(default_ann, hp)
    res = modelCrossValidationPCA(:ANN, full_hp, (inputs, targets), crossValidationIndices)
    push!(ann_results, (model=:ANN, hyperparams=hp, results=res))
end

results[:ANN] = ann_results


=== ANN experiment: topology = [8, 1] ===

=== ANN experiment: topology = [10, 5, 2] ===

=== ANN experiment: topology = [12, 6, 3] ===

=== ANN experiment: topology = [6, 3] ===

=== ANN experiment: topology = [20, 10, 5] ===

=== ANN experiment: topology = [5, 4, 3] ===

=== ANN experiment: topology = [16, 8, 4] ===

=== ANN experiment: topology = [30, 15, 7] ===


8-element Vector{Any}:
 (model = :ANN, hyperparams = Dict("topology" => [8, 1]), results = ((0.74352f0, 0.021126548f0), (0.25647998f0, 0.021126574f0), (0.74352f0, 0.021126548f0), (0.89420384f0, 0.010610055f0), (0.6188444f0, 0.030510025f0), (0.9548685f0, 0.0036827882f0), (0.6631487f0, 0.027983412f0), Float32[336.08002 2.44 0.76000005 0.71999997; 16.28 188.36002 29.320002 26.039999; 98.36 8.36 81.28001 12.0; 29.8 8.559999 23.84 137.8]))
 (model = :ANN, hyperparams = Dict("topology" => [10, 5, 2]), results = ((0.89427996f0, 0.0104848305f0), (0.10572f0, 0.0104848435f0), (0.89427996f0, 0.0104848305f0), (0.9689684f0, 0.0022970175f0), (0.85154504f0, 0.021394065f0), (0.98180246f0, 0.0016922741f0), (0.8692206f0, 0.016148983f0), Float32[334.28 0.96000004 4.3599997 0.4; 0.96000004 233.88 20.16 5.0; 14.159999 12.68 172.31999 0.84000003; 0.44 17.92 27.84 153.8]))
 (model = :ANN, hyperparams = Dict("topology" => [12, 6, 3]), results = ((0.93564f0, 0.009133862f0), (0.06436f0, 0.009133894f0), (0.93564

In [None]:
for entry in results[:ANN]
    printExperimentResult(entry.model, entry.hyperparams, entry.results, label_names)
end


 Model: ANN
 Hyperparameters: Dict("topology" => [8, 1])
 Accuracy (mean)               : 0.7435
 Accuracy (std)                : 0.0211
 Error Rate (mean)             : 0.2565
 Error Rate (std)              : 0.0211
 Sensitivity/Recall (mean)     : 0.7435
 Sensitivity/Recall (std)      : 0.0211
 Specificity (mean)            : 0.8942
 Specificity (std)             : 0.0106
 PPV (mean)                    : 0.6188
 PPV (std)                     : 0.0305
 NPV (mean)                    : 0.9549
 NPV (std)                     : 0.0037
 F1 Score (mean)               : 0.6631
 F1 Score (std)                : 0.028

Confusion Matrix:


4×4 Matrix{Float32}:
 336.08    2.44   0.76    0.72
  16.28  188.36  29.32   26.04
  98.36    8.36  81.28   12.0
  29.8     8.56  23.84  137.8



 Model: ANN
 Hyperparameters: Dict("topology" => [10, 5, 2])
 Accuracy (mean)               : 0.8943
 Accuracy (std)                : 0.0105
 Error Rate (mean)             : 0.1057
 Error Rate (std)              : 0.0105
 Sensitivity/Recall (mean)     : 0.8943
 Sensitivity/Recall (std)      : 0.0105
 Specificity (mean)            : 0.969
 Specificity (std)             : 0.0023
 PPV (mean)                    : 0.8515
 PPV (std)                     : 0.0214
 NPV (mean)                    : 0.9818
 NPV (std)                     : 0.0017
 F1 Score (mean)               : 0.8692
 F1 Score (std)                : 0.0161

Confusion Matrix:


4×4 Matrix{Float32}:
 334.28    0.96    4.36    0.4
   0.96  233.88   20.16    5.0
  14.16   12.68  172.32    0.84
   0.44   17.92   27.84  153.8



 Model: ANN
 Hyperparameters: Dict("topology" => [12, 6, 3])
 Accuracy (mean)               : 0.9356
 Accuracy (std)                : 0.0091
 Error Rate (mean)             : 0.0644
 Error Rate (std)              : 0.0091
 Sensitivity/Recall (mean)     : 0.9356
 Sensitivity/Recall (std)      : 0.0091
 Specificity (mean)            : 0.9809
 Specificity (std)             : 0.0028
 PPV (mean)                    : 0.9297
 PPV (std)                     : 0.0154
 NPV (mean)                    : 0.985
 NPV (std)                     : 0.0018
 F1 Score (mean)               : 0.9321
 F1 Score (std)                : 0.0125

Confusion Matrix:


4×4 Matrix{Float32}:
 333.32    0.84    5.44    0.4
   0.76  244.6     8.2     6.44
  13.08    7.6   176.04    3.28
   0.36    5.04   12.92  181.68



 Model: ANN
 Hyperparameters: Dict("topology" => [6, 3])
 Accuracy (mean)               : 0.9296
 Accuracy (std)                : 0.0122
 Error Rate (mean)             : 0.0704
 Error Rate (std)              : 0.0122
 Sensitivity/Recall (mean)     : 0.9296
 Sensitivity/Recall (std)      : 0.0122
 Specificity (mean)            : 0.9788
 Specificity (std)             : 0.0038
 PPV (mean)                    : 0.925
 PPV (std)                     : 0.0171
 NPV (mean)                    : 0.9843
 NPV (std)                     : 0.0015
 F1 Score (mean)               : 0.925
 F1 Score (std)                : 0.017

Confusion Matrix:


4×4 Matrix{Float32}:
 333.56    0.8     5.24    0.4
   0.64  243.0     7.68    8.68
  13.96   13.44  169.44    3.16
   0.4     4.08   11.88  183.64



 Model: ANN
 Hyperparameters: Dict("topology" => [20, 10, 5])
 Accuracy (mean)               : 0.9443
 Accuracy (std)                : 0.0071
 Error Rate (mean)             : 0.0557
 Error Rate (std)              : 0.0071
 Sensitivity/Recall (mean)     : 0.9443
 Sensitivity/Recall (std)      : 0.0071
 Specificity (mean)            : 0.9838
 Specificity (std)             : 0.0014
 PPV (mean)                    : 0.9439
 PPV (std)                     : 0.0069
 NPV (mean)                    : 0.9862
 NPV (std)                     : 0.0019
 F1 Score (mean)               : 0.9439
 F1 Score (std)                : 0.007

Confusion Matrix:


4×4 Matrix{Float32}:
 333.08    0.56    5.92    0.44
   0.72  243.96    8.36    6.96
  10.96    7.12  177.84    4.08
   0.24    4.44    5.92  189.4



 Model: ANN
 Hyperparameters: Dict("topology" => [5, 4, 3])
 Accuracy (mean)               : 0.9319
 Accuracy (std)                : 0.0073
 Error Rate (mean)             : 0.0681
 Error Rate (std)              : 0.0073
 Sensitivity/Recall (mean)     : 0.9319
 Sensitivity/Recall (std)      : 0.0073
 Specificity (mean)            : 0.9795
 Specificity (std)             : 0.0026
 PPV (mean)                    : 0.9266
 PPV (std)                     : 0.0123
 NPV (mean)                    : 0.9846
 NPV (std)                     : 0.0014
 F1 Score (mean)               : 0.9279
 F1 Score (std)                : 0.0101

Confusion Matrix:


4×4 Matrix{Float32}:
 333.72    0.84    5.04    0.4
   0.8   243.6     8.16    7.44
  14.12   11.56  171.44    2.88
   0.4     4.28   12.2   183.12



 Model: ANN
 Hyperparameters: Dict("topology" => [16, 8, 4])
 Accuracy (mean)               : 0.9433
 Accuracy (std)                : 0.0071
 Error Rate (mean)             : 0.0567
 Error Rate (std)              : 0.0071
 Sensitivity/Recall (mean)     : 0.9433
 Sensitivity/Recall (std)      : 0.0071
 Specificity (mean)            : 0.9836
 Specificity (std)             : 0.0016
 PPV (mean)                    : 0.943
 PPV (std)                     : 0.0069
 NPV (mean)                    : 0.9859
 NPV (std)                     : 0.0019
 F1 Score (mean)               : 0.9429
 F1 Score (std)                : 0.007

Confusion Matrix:


4×4 Matrix{Float32}:
 333.16    0.4     6.0     0.44
   0.84  244.12    8.24    6.8
  11.12    7.36  176.96    4.56
   0.24    4.68    6.0   189.08



 Model: ANN
 Hyperparameters: Dict("topology" => [30, 15, 7])
 Accuracy (mean)               : 0.9451
 Accuracy (std)                : 0.0071
 Error Rate (mean)             : 0.0549
 Error Rate (std)              : 0.0071
 Sensitivity/Recall (mean)     : 0.9451
 Sensitivity/Recall (std)      : 0.0071
 Specificity (mean)            : 0.9839
 Specificity (std)             : 0.0018
 PPV (mean)                    : 0.9448
 PPV (std)                     : 0.007
 NPV (mean)                    : 0.9864
 NPV (std)                     : 0.0017
 F1 Score (mean)               : 0.9447
 F1 Score (std)                : 0.0071

Confusion Matrix:


4×4 Matrix{Float32}:
 333.12    0.52    5.84    0.52
   0.8   244.12    8.32    6.76
  10.36    7.28  178.4     3.96
   0.32    4.52    5.68  189.48




# Support Vector Machines

In [11]:
SVMClassifier = MLJ.@load SVC pkg=LIBSVM verbosity=0

MLJLIBSVMInterface.SVC

In [12]:
############# 2. SVM (8+ configs: kernels × C) #############
default_svm = Dict(
    "gamma" => 1.0,
    "degree" => 3,
    "coef0" => 0.0
)
svm_search_space = [
    Dict("kernel"=>"linear", "C"=>0.1),
    Dict("kernel"=>"linear", "C"=>1.0),
    Dict("kernel"=>"linear", "C"=>10.0),

    Dict("kernel"=>"rbf", "C"=>1.0),
    Dict("kernel"=>"rbf", "C"=>10.0),

    Dict("kernel"=>"sigmoid", "C"=>1.0),
    Dict("kernel"=>"poly", "C"=>1.0),
    Dict("kernel"=>"poly", "C"=>5.0),
]

8-element Vector{Dict{String, Any}}:
 Dict("C" => 0.1, "kernel" => "linear")
 Dict("C" => 1.0, "kernel" => "linear")
 Dict("C" => 10.0, "kernel" => "linear")
 Dict("C" => 1.0, "kernel" => "rbf")
 Dict("C" => 10.0, "kernel" => "rbf")
 Dict("C" => 1.0, "kernel" => "sigmoid")
 Dict("C" => 1.0, "kernel" => "poly")
 Dict("C" => 5.0, "kernel" => "poly")

In [13]:
########################
# 2. SVM GRID SEARCH
########################
svm_results = []

for hp in svm_search_space
    println("\n=== SVM experiment: kernel=$(hp["kernel"]) C=$(get(hp,"C","-")) ===")
    full_hp = merge(default_svm, hp)
    res = modelCrossValidationPCA(:SVC, full_hp, (inputs, targets), crossValidationIndices)
    push!(svm_results, (model=:SVC, hyperparams=hp, results=res))
end

results[:SVC] = svm_results


=== SVM experiment: kernel=linear C=0.1 ===

=== SVM experiment: kernel=linear C=1.0 ===

=== SVM experiment: kernel=linear C=10.0 ===

=== SVM experiment: kernel=rbf C=1.0 ===

=== SVM experiment: kernel=rbf C=10.0 ===

=== SVM experiment: kernel=sigmoid C=1.0 ===

=== SVM experiment: kernel=poly C=1.0 ===

=== SVM experiment: kernel=poly C=5.0 ===


8-element Vector{Any}:
 (model = :SVC, hyperparams = Dict{String, Any}("C" => 0.1, "kernel" => "linear"), results = ((0.92080003f0, 0.010686428f0), (0.0792f0, 0.01068644f0), (0.92080003f0, 0.010686428f0), (0.97122383f0, 0.0043843295f0), (0.9211836f0, 0.010112118f0), (0.98120815f0, 0.0025284868f0), (0.91903895f0, 0.011139641f0), Float32[330.2 4.0 4.2 1.6; 0.2 241.0 9.8 9.0; 19.8 8.8 169.6 1.8; 2.6 6.8 10.6 180.0]))
 (model = :SVC, hyperparams = Dict{String, Any}("C" => 1.0, "kernel" => "linear"), results = ((0.9332f0, 0.009093944f0), (0.0668f0, 0.009093953f0), (0.9332f0, 0.009093944f0), (0.9777444f0, 0.0031013635f0), (0.932876f0, 0.008638319f0), (0.9834383f0, 0.0021604728f0), (0.93236893f0, 0.009197279f0), Float32[331.0 2.0 6.2 0.8; 0.4 242.4 8.0 9.2; 16.0 7.6 173.2 3.2; 1.2 5.2 7.0 186.6]))
 (model = :SVC, hyperparams = Dict{String, Any}("C" => 10.0, "kernel" => "linear"), results = ((0.93439996f0, 0.009044345f0), (0.0656f0, 0.009044334f0), (0.93439996f0, 0.009044345f0), (0.97957623f0,

In [None]:
for entry in results[:SVC]
    printExperimentResult(entry.model, entry.hyperparams, entry.results, label_names)
end


 Model: SVC
 Hyperparameters: Dict{String, Any}("C" => 0.1, "kernel" => "linear")
 Accuracy (mean)               : 0.9208
 Accuracy (std)                : 0.0107
 Error Rate (mean)             : 0.0792
 Error Rate (std)              : 0.0107
 Sensitivity/Recall (mean)     : 0.9208
 Sensitivity/Recall (std)      : 0.0107
 Specificity (mean)            : 0.9712
 Specificity (std)             : 0.0044
 PPV (mean)                    : 0.9212
 PPV (std)                     : 0.0101
 NPV (mean)                    : 0.9812
 NPV (std)                     : 0.0025
 F1 Score (mean)               : 0.919
 F1 Score (std)                : 0.0111

Confusion Matrix:


4×4 Matrix{Float32}:
 330.2    4.0    4.2    1.6
   0.2  241.0    9.8    9.0
  19.8    8.8  169.6    1.8
   2.6    6.8   10.6  180.0



 Model: SVC
 Hyperparameters: Dict{String, Any}("C" => 1.0, "kernel" => "linear")
 Accuracy (mean)               : 0.9332
 Accuracy (std)                : 0.0091
 Error Rate (mean)             : 0.0668
 Error Rate (std)              : 0.0091
 Sensitivity/Recall (mean)     : 0.9332
 Sensitivity/Recall (std)      : 0.0091
 Specificity (mean)            : 0.9777
 Specificity (std)             : 0.0031
 PPV (mean)                    : 0.9329
 PPV (std)                     : 0.0086
 NPV (mean)                    : 0.9834
 NPV (std)                     : 0.0022
 F1 Score (mean)               : 0.9324
 F1 Score (std)                : 0.0092

Confusion Matrix:


4×4 Matrix{Float32}:
 331.0    2.0    6.2    0.8
   0.4  242.4    8.0    9.2
  16.0    7.6  173.2    3.2
   1.2    5.2    7.0  186.6



 Model: SVC
 Hyperparameters: Dict{String, Any}("C" => 10.0, "kernel" => "linear")
 Accuracy (mean)               : 0.9344
 Accuracy (std)                : 0.009
 Error Rate (mean)             : 0.0656
 Error Rate (std)              : 0.009
 Sensitivity/Recall (mean)     : 0.9344
 Sensitivity/Recall (std)      : 0.009
 Specificity (mean)            : 0.9796
 Specificity (std)             : 0.0034
 PPV (mean)                    : 0.934
 PPV (std)                     : 0.0088
 NPV (mean)                    : 0.9833
 NPV (std)                     : 0.0022
 F1 Score (mean)               : 0.9338
 F1 Score (std)                : 0.0091

Confusion Matrix:


4×4 Matrix{Float32}:
 331.0    1.2    7.6    0.2
   0.4  243.6    7.2    8.8
  14.4    8.4  173.2    4.0
   1.0    6.0    6.4  186.6



 Model: SVC
 Hyperparameters: Dict{String, Any}("C" => 1.0, "kernel" => "rbf")
 Accuracy (mean)               : 0.9344
 Accuracy (std)                : 0.0085
 Error Rate (mean)             : 0.0656
 Error Rate (std)              : 0.0085
 Sensitivity/Recall (mean)     : 0.9344
 Sensitivity/Recall (std)      : 0.0085
 Specificity (mean)            : 0.9789
 Specificity (std)             : 0.0026
 PPV (mean)                    : 0.9339
 PPV (std)                     : 0.0083
 NPV (mean)                    : 0.9838
 NPV (std)                     : 0.0022
 F1 Score (mean)               : 0.9336
 F1 Score (std)                : 0.0086

Confusion Matrix:


4×4 Matrix{Float32}:
 331.2    1.6    6.6    0.6
   0.4  244.2    7.4    8.0
  15.4    8.2  172.8    3.6
   1.0    5.2    7.6  186.2



 Model: SVC
 Hyperparameters: Dict{String, Any}("C" => 10.0, "kernel" => "rbf")
 Accuracy (mean)               : 0.9418
 Accuracy (std)                : 0.0063
 Error Rate (mean)             : 0.0582
 Error Rate (std)              : 0.0063
 Sensitivity/Recall (mean)     : 0.9418
 Sensitivity/Recall (std)      : 0.0063
 Specificity (mean)            : 0.9822
 Specificity (std)             : 0.0014
 PPV (mean)                    : 0.9415
 PPV (std)                     : 0.0059
 NPV (mean)                    : 0.9855
 NPV (std)                     : 0.0019
 F1 Score (mean)               : 0.9414
 F1 Score (std)                : 0.0061

Confusion Matrix:


4×4 Matrix{Float32}:
 331.4    1.4    6.8    0.4
   0.4  245.0    7.6    7.0
  11.6    7.8  176.6    4.0
   0.8    4.4    6.0  188.8



 Model: SVC
 Hyperparameters: Dict{String, Any}("C" => 1.0, "kernel" => "sigmoid")
 Accuracy (mean)               : 0.933
 Accuracy (std)                : 0.0085
 Error Rate (mean)             : 0.067
 Error Rate (std)              : 0.0085
 Sensitivity/Recall (mean)     : 0.933
 Sensitivity/Recall (std)      : 0.0085
 Specificity (mean)            : 0.978
 Specificity (std)             : 0.003
 PPV (mean)                    : 0.9325
 PPV (std)                     : 0.0083
 NPV (mean)                    : 0.9831
 NPV (std)                     : 0.002
 F1 Score (mean)               : 0.9322
 F1 Score (std)                : 0.0086

Confusion Matrix:


4×4 Matrix{Float32}:
 330.8    2.0    6.4    0.8
   0.4  243.0    7.4    9.2
  15.8    8.4  172.0    3.8
   1.2    5.0    6.6  187.2



 Model: SVC
 Hyperparameters: Dict{String, Any}("C" => 1.0, "kernel" => "poly")
 Accuracy (mean)               : 0.7728
 Accuracy (std)                : 0.0161
 Error Rate (mean)             : 0.2272
 Error Rate (std)              : 0.0161
 Sensitivity/Recall (mean)     : 0.7728
 Sensitivity/Recall (std)      : 0.0161
 Specificity (mean)            : 0.909
 Specificity (std)             : 0.0074
 PPV (mean)                    : 0.8302
 PPV (std)                     : 0.0097
 NPV (mean)                    : 0.9197
 NPV (std)                     : 0.0061
 F1 Score (mean)               : 0.7709
 F1 Score (std)                : 0.0161

Confusion Matrix:


4×4 Matrix{Float32}:
 303.2    0.4   17.4   19.0
  37.6  174.6   24.8   23.0
  63.0    7.6  126.4    3.0
  17.2    5.0    9.2  168.6



 Model: SVC
 Hyperparameters: Dict{String, Any}("C" => 5.0, "kernel" => "poly")
 Accuracy (mean)               : 0.878
 Accuracy (std)                : 0.01
 Error Rate (mean)             : 0.122
 Error Rate (std)              : 0.01
 Sensitivity/Recall (mean)     : 0.878
 Sensitivity/Recall (std)      : 0.01
 Specificity (mean)            : 0.9539
 Specificity (std)             : 0.0044
 PPV (mean)                    : 0.8925
 PPV (std)                     : 0.0077
 NPV (mean)                    : 0.9594
 NPV (std)                     : 0.0041
 F1 Score (mean)               : 0.8771
 F1 Score (std)                : 0.01

Confusion Matrix:


4×4 Matrix{Float32}:
 327.6    0.4    6.4    5.6
  16.4  214.4   15.0   14.2
  33.8    7.6  155.2    3.4
   8.0    4.2    7.0  180.8




# Decission Trees

In [15]:
DTClassifier = MLJ.@load DecisionTreeClassifier pkg=DecisionTree verbosity=0

MLJDecisionTreeInterface.DecisionTreeClassifier

In [16]:
############# 3. DECISION TREES (6 depths) #############
default_dt = Dict(
    "rng" => Random.MersenneTwister(1)
)

dt_search_space = [
    Dict("max_depth"=>2),
    Dict("max_depth"=>3),
    Dict("max_depth"=>4),
    Dict("max_depth"=>5),
    Dict("max_depth"=>6),
    Dict("max_depth"=>8)
]

6-element Vector{Dict{String, Int64}}:
 Dict("max_depth" => 2)
 Dict("max_depth" => 3)
 Dict("max_depth" => 4)
 Dict("max_depth" => 5)
 Dict("max_depth" => 6)
 Dict("max_depth" => 8)

In [17]:
########################
# 3. DECISION TREE GRID SEARCH
########################
dt_results = []

for hp in dt_search_space
    println("\n=== Decision Tree experiment: max_depth=$(hp["max_depth"]) ===")
    full_hp = merge(default_dt, hp) 
    res = modelCrossValidationPCA(:DecisionTreeClassifier, full_hp, (inputs, targets), crossValidationIndices)
    push!(dt_results, (model=:DT, hyperparams=hp, results=res))
end

results[:DT] = dt_results


=== Decision Tree experiment: max_depth=2 ===

=== Decision Tree experiment: max_depth=3 ===

=== Decision Tree experiment: max_depth=4 ===

=== Decision Tree experiment: max_depth=5 ===

=== Decision Tree experiment: max_depth=6 ===

=== Decision Tree experiment: max_depth=8 ===


6-element Vector{Any}:
 (model = :DT, hyperparams = Dict("max_depth" => 2), results = ((0.83219993f0, 0.0054497677f0), (0.16780001f0, 0.0054497714f0), (0.83219993f0, 0.0054497677f0), (0.94234765f0, 0.0018211625f0), (0.7638066f0, 0.0061601675f0), (0.96546876f0, 0.0011020584f0), (0.7940912f0, 0.005148579f0), Float32[319.4 8.2 7.2 5.2; 0.8 226.8 24.6 7.8; 27.4 21.0 151.6 0.0; 3.8 21.8 40.0 134.4]))
 (model = :DT, hyperparams = Dict("max_depth" => 3), results = ((0.9103999f0, 0.004827013f0), (0.089600004f0, 0.0048270086f0), (0.9103999f0, 0.004827013f0), (0.96983016f0, 0.0025741593f0), (0.9102656f0, 0.004390337f0), (0.97503775f0, 0.0018103148f0), (0.90898335f0, 0.0050847237f0), Float32[327.2 2.4 7.6 2.8; 3.0 235.8 9.6 11.6; 23.8 10.2 160.0 6.0; 1.0 6.0 5.6 187.4]))
 (model = :DT, hyperparams = Dict("max_depth" => 4), results = ((0.91380006f0, 0.00653453f0), (0.0862f0, 0.006534524f0), (0.91380006f0, 0.00653453f0), (0.97390634f0, 0.002015421f0), (0.9154504f0, 0.004706984f0), (0.9742853f0, 0.0

In [None]:
for entry in results[:DT]
    printExperimentResult(entry.model, entry.hyperparams, entry.results, label_names)
end


 Model: DT
 Hyperparameters: Dict("max_depth" => 2)
 Accuracy (mean)               : 0.8322
 Accuracy (std)                : 0.0054
 Error Rate (mean)             : 0.1678
 Error Rate (std)              : 0.0054
 Sensitivity/Recall (mean)     : 0.8322
 Sensitivity/Recall (std)      : 0.0054
 Specificity (mean)            : 0.9423
 Specificity (std)             : 0.0018
 PPV (mean)                    : 0.7638
 PPV (std)                     : 0.0062
 NPV (mean)                    : 0.9655
 NPV (std)                     : 0.0011
 F1 Score (mean)               : 0.7941
 F1 Score (std)                : 0.0051

Confusion Matrix:


4×4 Matrix{Float32}:
 319.4    8.2    7.2    5.2
   0.8  226.8   24.6    7.8
  27.4   21.0  151.6    0.0
   3.8   21.8   40.0  134.4



 Model: DT
 Hyperparameters: Dict("max_depth" => 3)
 Accuracy (mean)               : 0.9104
 Accuracy (std)                : 0.0048
 Error Rate (mean)             : 0.0896
 Error Rate (std)              : 0.0048
 Sensitivity/Recall (mean)     : 0.9104
 Sensitivity/Recall (std)      : 0.0048
 Specificity (mean)            : 0.9698
 Specificity (std)             : 0.0026
 PPV (mean)                    : 0.9103
 PPV (std)                     : 0.0044
 NPV (mean)                    : 0.975
 NPV (std)                     : 0.0018
 F1 Score (mean)               : 0.909
 F1 Score (std)                : 0.0051

Confusion Matrix:


4×4 Matrix{Float32}:
 327.2    2.4    7.6    2.8
   3.0  235.8    9.6   11.6
  23.8   10.2  160.0    6.0
   1.0    6.0    5.6  187.4



 Model: DT
 Hyperparameters: Dict("max_depth" => 4)
 Accuracy (mean)               : 0.9138
 Accuracy (std)                : 0.0065
 Error Rate (mean)             : 0.0862
 Error Rate (std)              : 0.0065
 Sensitivity/Recall (mean)     : 0.9138
 Sensitivity/Recall (std)      : 0.0065
 Specificity (mean)            : 0.9739
 Specificity (std)             : 0.002
 PPV (mean)                    : 0.9155
 PPV (std)                     : 0.0047
 NPV (mean)                    : 0.9743
 NPV (std)                     : 0.0025
 F1 Score (mean)               : 0.9139
 F1 Score (std)                : 0.0062

Confusion Matrix:


4×4 Matrix{Float32}:
 318.6    2.6   16.8    2.0
   1.4  243.4    7.6    7.6
  13.4   11.8  168.8    6.0
   1.2   10.2    5.6  183.0



 Model: DT
 Hyperparameters: Dict("max_depth" => 5)
 Accuracy (mean)               : 0.9192
 Accuracy (std)                : 0.0066
 Error Rate (mean)             : 0.0808
 Error Rate (std)              : 0.0066
 Sensitivity/Recall (mean)     : 0.9192
 Sensitivity/Recall (std)      : 0.0066
 Specificity (mean)            : 0.9746
 Specificity (std)             : 0.0022
 PPV (mean)                    : 0.9183
 PPV (std)                     : 0.007
 NPV (mean)                    : 0.9778
 NPV (std)                     : 0.0015
 F1 Score (mean)               : 0.9184
 F1 Score (std)                : 0.0067

Confusion Matrix:


4×4 Matrix{Float32}:
 326.6    2.2    9.2    2.0
   1.2  239.4    9.6    9.8
  17.2   10.2  166.6    6.0
   1.2    6.6    5.6  186.6



 Model: DT
 Hyperparameters: Dict("max_depth" => 6)
 Accuracy (mean)               : 0.9166
 Accuracy (std)                : 0.0072
 Error Rate (mean)             : 0.0834
 Error Rate (std)              : 0.0072
 Sensitivity/Recall (mean)     : 0.9166
 Sensitivity/Recall (std)      : 0.0072
 Specificity (mean)            : 0.9737
 Specificity (std)             : 0.0013
 PPV (mean)                    : 0.917
 PPV (std)                     : 0.007
 NPV (mean)                    : 0.9758
 NPV (std)                     : 0.0027
 F1 Score (mean)               : 0.9162
 F1 Score (std)                : 0.0068

Confusion Matrix:


4×4 Matrix{Float32}:
 322.0    2.4   13.8    1.8
   1.6  241.0   10.4    7.0
  14.8   10.0  170.0    5.2
   1.6    8.6    6.2  183.6



 Model: DT
 Hyperparameters: Dict("max_depth" => 8)
 Accuracy (mean)               : 0.9166
 Accuracy (std)                : 0.0046
 Error Rate (mean)             : 0.0834
 Error Rate (std)              : 0.0046
 Sensitivity/Recall (mean)     : 0.9166
 Sensitivity/Recall (std)      : 0.0046
 Specificity (mean)            : 0.9732
 Specificity (std)             : 0.0014
 PPV (mean)                    : 0.9168
 PPV (std)                     : 0.0036
 NPV (mean)                    : 0.977
 NPV (std)                     : 0.0024
 F1 Score (mean)               : 0.916
 F1 Score (std)                : 0.0043

Confusion Matrix:


4×4 Matrix{Float32}:
 325.8    2.6   10.2    1.4
   1.4  240.8    9.6    8.2
  17.2   11.0  166.8    5.0
   1.2    8.2    7.4  183.2




# K-Nearest Neighbors

In [19]:
kNNClassifier = MLJ.@load KNNClassifier pkg=NearestNeighborModels verbosity=0

NearestNeighborModels.KNNClassifier

In [20]:
############# 4. kNN (6 values) #############
knn_search_space = [
    Dict("K"=>1),
    Dict("K"=>3),
    Dict("K"=>5),
    Dict("K"=>7),
    Dict("K"=>9),
    Dict("K"=>11)
]

6-element Vector{Dict{String, Int64}}:
 Dict("K" => 1)
 Dict("K" => 3)
 Dict("K" => 5)
 Dict("K" => 7)
 Dict("K" => 9)
 Dict("K" => 11)

In [21]:
########################
# 4. KNN GRID SEARCH
########################
knn_results = []

for hp in knn_search_space
    println("\n=== kNN experiment: K=$(hp["K"]) ===")
    res = modelCrossValidationPCA(:KNeighborsClassifier, hp, (inputs, targets), crossValidationIndices)
    push!(knn_results, (model=:KNN, hyperparams=hp, results=res))
end

results[:KNN] = knn_results


=== kNN experiment: K=1 ===

=== kNN experiment: K=3 ===

=== kNN experiment: K=5 ===

=== kNN experiment: K=7 ===

=== kNN experiment: K=9 ===

=== kNN experiment: K=11 ===


6-element Vector{Any}:
 (model = :KNN, hyperparams = Dict("K" => 1), results = ((0.9118f0, 0.00554077f0), (0.0882f0, 0.005540759f0), (0.9118f0, 0.00554077f0), (0.9731444f0, 0.0025371849f0), (0.91048753f0, 0.0060021915f0), (0.9779686f0, 0.0012446409f0), (0.9105797f0, 0.006166984f0), Float32[329.6 1.2 8.2 1.0; 0.8 236.8 12.6 9.8; 18.4 13.2 162.4 6.0; 1.2 6.4 9.4 183.0]))
 (model = :KNN, hyperparams = Dict("K" => 3), results = ((0.926f0, 0.0058736657f0), (0.074f0, 0.0058736703f0), (0.926f0, 0.0058736657f0), (0.9761468f0, 0.002312451f0), (0.9251598f0, 0.005393791f0), (0.98217213f0, 0.0011734694f0), (0.9248446f0, 0.0061802254f0), Float32[331.6 1.8 5.4 1.2; 0.2 241.4 10.0 8.4; 16.6 9.4 169.8 4.2; 1.4 6.6 8.8 183.2]))
 (model = :KNN, hyperparams = Dict("K" => 5), results = ((0.92960006f0, 0.0069137425f0), (0.0704f0, 0.006913755f0), (0.92960006f0, 0.0069137425f0), (0.97638416f0, 0.002669647f0), (0.92906666f0, 0.0064019514f0), (0.98381835f0, 0.00093439093f0), (0.928181f0, 0.0073966393f0), Float

In [None]:
for entry in results[:KNN]
    printExperimentResult(entry.model, entry.hyperparams, entry.results, label_names)
end


 Model: KNN
 Hyperparameters: Dict("K" => 1)
 Accuracy (mean)               : 0.9118
 Accuracy (std)                : 0.0055
 Error Rate (mean)             : 0.0882
 Error Rate (std)              : 0.0055
 Sensitivity/Recall (mean)     : 0.9118
 Sensitivity/Recall (std)      : 0.0055
 Specificity (mean)            : 0.9731
 Specificity (std)             : 0.0025
 PPV (mean)                    : 0.9105
 PPV (std)                     : 0.006
 NPV (mean)                    : 0.978
 NPV (std)                     : 0.0012
 F1 Score (mean)               : 0.9106
 F1 Score (std)                : 0.0062

Confusion Matrix:


4×4 Matrix{Float32}:
 329.6    1.2    8.2    1.0
   0.8  236.8   12.6    9.8
  18.4   13.2  162.4    6.0
   1.2    6.4    9.4  183.0



 Model: KNN
 Hyperparameters: Dict("K" => 3)
 Accuracy (mean)               : 0.926
 Accuracy (std)                : 0.0059
 Error Rate (mean)             : 0.074
 Error Rate (std)              : 0.0059
 Sensitivity/Recall (mean)     : 0.926
 Sensitivity/Recall (std)      : 0.0059
 Specificity (mean)            : 0.9761
 Specificity (std)             : 0.0023
 PPV (mean)                    : 0.9252
 PPV (std)                     : 0.0054
 NPV (mean)                    : 0.9822
 NPV (std)                     : 0.0012
 F1 Score (mean)               : 0.9248
 F1 Score (std)                : 0.0062

Confusion Matrix:


4×4 Matrix{Float32}:
 331.6    1.8    5.4    1.2
   0.2  241.4   10.0    8.4
  16.6    9.4  169.8    4.2
   1.4    6.6    8.8  183.2



 Model: KNN
 Hyperparameters: Dict("K" => 5)
 Accuracy (mean)               : 0.9296
 Accuracy (std)                : 0.0069
 Error Rate (mean)             : 0.0704
 Error Rate (std)              : 0.0069
 Sensitivity/Recall (mean)     : 0.9296
 Sensitivity/Recall (std)      : 0.0069
 Specificity (mean)            : 0.9764
 Specificity (std)             : 0.0027
 PPV (mean)                    : 0.9291
 PPV (std)                     : 0.0064
 NPV (mean)                    : 0.9838
 NPV (std)                     : 0.0009
 F1 Score (mean)               : 0.9282
 F1 Score (std)                : 0.0074

Confusion Matrix:


4×4 Matrix{Float32}:
 333.8    2.2    3.0    1.0
   0.2  242.8    8.8    8.2
  17.6    9.2  169.6    3.6
   1.0    5.4   10.2  183.4



 Model: KNN
 Hyperparameters: Dict("K" => 7)
 Accuracy (mean)               : 0.9296
 Accuracy (std)                : 0.0063
 Error Rate (mean)             : 0.0704
 Error Rate (std)              : 0.0063
 Sensitivity/Recall (mean)     : 0.9296
 Sensitivity/Recall (std)      : 0.0063
 Specificity (mean)            : 0.9761
 Specificity (std)             : 0.0022
 PPV (mean)                    : 0.9289
 PPV (std)                     : 0.0062
 NPV (mean)                    : 0.9839
 NPV (std)                     : 0.0009
 F1 Score (mean)               : 0.9281
 F1 Score (std)                : 0.0067

Confusion Matrix:


4×4 Matrix{Float32}:
 333.4    2.4    3.0    1.2
   0.2  242.0    9.0    8.8
  17.8    8.2  170.6    3.4
   0.8    5.6   10.0  183.6



 Model: KNN
 Hyperparameters: Dict("K" => 9)
 Accuracy (mean)               : 0.929
 Accuracy (std)                : 0.0068
 Error Rate (mean)             : 0.071
 Error Rate (std)              : 0.0068
 Sensitivity/Recall (mean)     : 0.929
 Sensitivity/Recall (std)      : 0.0068
 Specificity (mean)            : 0.9755
 Specificity (std)             : 0.0024
 PPV (mean)                    : 0.9283
 PPV (std)                     : 0.0068
 NPV (mean)                    : 0.9837
 NPV (std)                     : 0.0012
 F1 Score (mean)               : 0.9275
 F1 Score (std)                : 0.0071

Confusion Matrix:


4×4 Matrix{Float32}:
 333.2    2.6    3.0    1.2
   0.0  242.6    9.6    7.8
  19.0    8.8  169.4    2.8
   1.4    6.0    8.8  183.8



 Model: KNN
 Hyperparameters: Dict("K" => 11)
 Accuracy (mean)               : 0.9296
 Accuracy (std)                : 0.0058
 Error Rate (mean)             : 0.0704
 Error Rate (std)              : 0.0058
 Sensitivity/Recall (mean)     : 0.9296
 Sensitivity/Recall (std)      : 0.0058
 Specificity (mean)            : 0.9755
 Specificity (std)             : 0.0024
 PPV (mean)                    : 0.9294
 PPV (std)                     : 0.0052
 NPV (mean)                    : 0.984
 NPV (std)                     : 0.0011
 F1 Score (mean)               : 0.928
 F1 Score (std)                : 0.006

Confusion Matrix:


4×4 Matrix{Float32}:
 333.6    2.8    2.4    1.2
   0.0  243.8    8.2    8.0
  18.8    8.8  169.8    2.6
   1.2    6.6    9.8  182.4


