In [1]:
# Define the packages
using Pkg
using CategoricalArrays
using MAT
using DataFrames
using MLJ
using LinearAlgebra
using Statistics
using Plots
using StatsPlots
using CSV

In [2]:
# sub-MOA101, sub-MOA102, sub-MOA104, sub-MOA105, sub-MOA107, sub-MOA108, sub-MOA109, sub-MOA110, sub-MOA111, sub-MOA112, sub-MOA114,
# sub-MOA115, sub-MOA116, sub-MOA118, sub-MOA121, sub-MOA122, sub-MOA123, sub-MOA124, sub-MOA126, sub-MOA127, sub-MOA128, sub-MOA130,
# sub-MOA131, sub-MOA133, sub-MOA134, sub-MOA135

# Absolute MADRS scores at session d2
y_absolute = (24,22,17,34,21,41,18,29,0,5,31,22,22,5,26,37,27,36,9,20,25,29,29,32,17,37)

# Delta change in MADRS from session b0 to session d2
y_delta = (11,9,24,2,17,-8,7,2,30,30,6,4,12,28,-1,-7,3,-1,15,12,8,9,-1,2,18,3)

(11, 9, 24, 2, 17, -8, 7, 2, 30, 30, 6, 4, 12, 28, -1, -7, 3, -1, 15, 12, 8, 9, -1, 2, 18, 3)

In [3]:
# List of subjects that have some degree of depression
target_subjects = [
    "sub-MOA101", "sub-MOA102", "sub-MOA104", "sub-MOA105", "sub-MOA107", "sub-MOA108", "sub-MOA109", "sub-MOA110", "sub-MOA111",
    "sub-MOA112", "sub-MOA114", "sub-MOA115", "sub-MOA116", "sub-MOA118", "sub-MOA121", "sub-MOA122", "sub-MOA123","sub-MOA124", 
    "sub-MOA126", "sub-MOA127", "sub-MOA128", "sub-MOA130", "sub-MOA131", "sub-MOA133", "sub-MOA134", "sub-MOA135"]

# Base path to your subject folders
base_path = "Spectral_DCM_Collection_Diag_4x4"

# Collect valid file paths
valid_files = String[]

for subj in target_subjects
    subj_path = joinpath(base_path, subj)
    ses_path = joinpath(subj_path, "ses-b0")
    glm_path = joinpath(ses_path, "glm")
    dcm_file = joinpath(glm_path, "spDCM_DMN.mat")

    if !isdir(ses_path)
        @warn "Missing session folder: $ses_path"
    elseif !isfile(dcm_file)
        @warn "Missing spDCM_DMN.mat for $subj"
    else
        push!(valid_files, dcm_file)
    end
end

println("✅ Found spDCM_DMN.mat for $(length(valid_files)) out of $(length(target_subjects)) subjects.")


✅ Found spDCM_DMN.mat for 26 out of 26 subjects.


In [4]:
# Extract A matrix features as a flat 16-element vector
function extract_features(file)
    mat = matread(file)
    A = mat["params"]  # 4×4 matrix
    return vec(Matrix(A))  # Flatten to 16-element vector
end

extract_features (generic function with 1 method)

In [5]:
# Create feature dataset
X = hcat([extract_features(file) for file in valid_files]...)'

X_df = DataFrame(X, :auto)  # convert to MLJ-compatible table

# Ensure that the number of subjects match in X_df and Y labels of different modalities
@assert size(X_df, 1) == length(y_absolute) "Mismatch between number of samples in X and y_absolute"
@assert size(X_df, 1) == length(y_delta) "Mismatch between number of samples in X and y_delta"


In [6]:
using MLJModels, MLJScikitLearnInterface, MLJBase

function evaluate_regression_model(X_df::DataFrame, y, model_label::String, nfolds::Int=5)
    # Load and chain standardizer with ElasticNetCVRegressor
    Standardizer = @load Standardizer pkg=MLJModels verbosity=0
    ElasticNetCVRegressor = @load ElasticNetCVRegressor pkg=MLJScikitLearnInterface verbosity=0

    model = Standardizer() |> ElasticNetCVRegressor()
    mach = machine(model, X_df, y)

    # Metrics
    metrics = [rms, mae]
    metric_labels = ["RMSE", "MAE"]

    # Cross-validation
    cv = CV(nfolds=nfolds, shuffle=true, rng=42)
    results = evaluate!(mach,
        resampling=cv,
        measures=metrics,
        operation=predict,
        verbosity=0
    )

    # Extract per-fold metrics
    all_scores = results.per_fold
    flat_scores = vcat(all_scores...)

    # Create DataFrame for table
    metrics_df = DataFrame(
        Fold = 1:nfolds,
        RMSE = all_scores[1],
        MAE = all_scores[2]
    )
    avg_row = DataFrame(Fold = ["Mean"], RMSE = [mean(all_scores[1])],
                        MAE = [mean(all_scores[2])])
    metrics_table = vcat(metrics_df, avg_row)

    # Save metrics table as CSV
    CSV.write("regression_metrics_4x4_$(nfolds)_fold_$(model_label).csv", metrics_table)

    # Prepare plot data
    plot_df = DataFrame(
        Fold = repeat(1:nfolds, outer=length(metrics)),
        Metric = repeat(metric_labels, inner=nfolds),
        Value = flat_scores
    )

    # Plot grouped bar
    @df plot_df groupedbar(
        string.(:Fold), :Value, group=:Metric,
        bar_position=:dodge,
        bar_width=0.2,
        xlabel="Fold", ylabel="Metric Value",
        title="Regression Metrics per Fold",
        legend=:outertop,
        guidefontsize=10,
        tickfontsize=10,
        size=(750, 500),
        dpi=300
    )
    savefig("regression_metrics_4x4_$(nfolds)_fold_$(model_label).png")
end

evaluate_regression_model (generic function with 2 methods)

In [7]:
evaluate_regression_model(X_df, collect(y_absolute), "elasticnet_cv_absolute")

"/Users/keyshavmor/ETH/TNM_Final_Project/Project_8/regression_metrics_4x4_5_fold_elasticnet_cv_absolute.png"

In [8]:
evaluate_regression_model(X_df, collect(y_delta), "elasticnet_cv_delta")

"/Users/keyshavmor/ETH/TNM_Final_Project/Project_8/regression_metrics_4x4_5_fold_elasticnet_cv_delta.png"

In [9]:
using PythonCall

function evaluate_gaussian_regression_model(X_df::DataFrame, y, model_label::String, nfolds::Int=5)
    # Load the model
    @load GaussianProcessRegressor pkg=MLJScikitLearnInterface

    # Python kernel setup (Gaussian = RBF)
    sklearn = pyimport("sklearn.gaussian_process")
    RBF = sklearn.kernels.RBF
    ConstantKernel = sklearn.kernels.ConstantKernel
    rbf_kernel = ConstantKernel(1.0) * RBF(length_scale=1.0)


    model = Standardizer() |> GaussianProcessRegressor(kernel=rbf_kernel)
    mach = machine(model, X_df, y)

    # Metrics
    metrics = [rms, mae]
    metric_labels = ["RMSE", "MAE"]

    # Cross-validation
    cv = CV(nfolds=nfolds, shuffle=true, rng=42)
    results = evaluate!(mach,
        resampling=cv,
        measures=metrics,
        operation=predict,
        verbosity=0
    )

    # Extract per-fold metrics
    all_scores = results.per_fold
    flat_scores = vcat(all_scores...)

    # Create DataFrame for table
    metrics_df = DataFrame(
        Fold = 1:nfolds,
        RMSE = all_scores[1],
        MAE = all_scores[2]
    )
    avg_row = DataFrame(Fold = ["Mean"], RMSE = [mean(all_scores[1])],
                        MAE = [mean(all_scores[2])])
    metrics_table = vcat(metrics_df, avg_row)

    # Save metrics table as CSV
    CSV.write("regression_metrics_4x4_$(nfolds)_fold_$(model_label).csv", metrics_table)

    # Prepare plot data
    plot_df = DataFrame(
        Fold = repeat(1:nfolds, outer=length(metrics)),
        Metric = repeat(metric_labels, inner=nfolds),
        Value = flat_scores
    )

    # Plot grouped bar
    @df plot_df groupedbar(
        string.(:Fold), :Value, group=:Metric,
        bar_position=:dodge,
        bar_width=0.2,
        xlabel="Fold", ylabel="Metric Value",
        title="Regression Metrics per Fold",
        legend=:outertop,
        guidefontsize=10,
        tickfontsize=10,
        size=(750, 500),
        dpi=300
    )
    savefig("regression_metrics_4x4_$(nfolds)_fold_$(model_label).png")
end

evaluate_gaussian_regression_model (generic function with 2 methods)

In [10]:
evaluate_regression_model(X_df, collect(y_absolute), "gp_absolute")

"/Users/keyshavmor/ETH/TNM_Final_Project/Project_8/regression_metrics_4x4_5_fold_gp_absolute.png"

In [11]:
evaluate_regression_model(X_df, collect(y_delta), "gp_delta")

"/Users/keyshavmor/ETH/TNM_Final_Project/Project_8/regression_metrics_4x4_5_fold_gp_delta.png"