In [2]:
import scipy.stats as stats
from scipy.interpolate import BSpline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import shap


from efficient_kan import KAN
from models.mlp import MLP
from sklearn import svm


import torch
import torch.nn as nn

from kan.utils import create_dataset
from dataloaders.dataloader import FCMatrixDataset
from torch.utils.data import Dataset, DataLoader, Subset

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE = torch.device("cpu")


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


In [12]:
import wandb
wandb.login()

# mlp_project = "severe_rds_v2_MLP"
# gcn_project = "severe_rds_v2_GCN"
# kan_project = "severe_rds_v2_KAN_api"
# svm_project = "severe_rds_v2_SVM"

mlp_project = "rds_under_8_severe_v2_MLP"
# gcn_project = "rds_under_8_severe_v2_GCN"
kan_project = "rds_under_8_severe_v2_KAN_api"
svm_project = "rds_under_8_severe_v2_SVM"

api = wandb.Api()

# projects = {"MLP": api.project("severe_rds_MLP"), "GCN": api.project("severe_rds_GCN_mrmr_25751"), "KAN": api.project("severe_rds_KAN_api"), "SVM": api.project("severe_rds_SVM")}
projects = {"MLP": api.project(mlp_project), "KAN": api.project(kan_project), "SVM": api.project(svm_project)}

print(projects.values())

all_sweeps = {}
for name, project in projects.items():
    all_sweeps[name] = [api.sweep(f"{project.name}/{sweep.id}") for sweep in project.sweeps()]

all_runs = {project_name: {sweep.id:sweep.runs for sweep in sweeps} for (project_name, sweeps) in all_sweeps.items()}


all_best_runs = {}

for name, sweeps in all_runs.items():
    project_sweeps = {}
    for (sweep_name, sweep) in sweeps.items():
        best_run = None
        best_acc = -1
        for run in sweep:

            ACC = run.summary.get("ACC")
            if ACC is not None and ACC > best_acc:
                best_run = run
                best_acc = ACC
        project_sweeps[sweep_name] = best_run
    all_best_runs[name] = project_sweeps


# for name, runs in all_runs.items():
#     print(runs)
#     best_run = None
#     best_acc = -1
#     for run in runs:
#         ACC = run.summary.get("ACC")
#         if ACC is not None and ACC > best_acc:
#             best_run = run
#             best_acc = ACC




dict_values([<Project laurens-devries/rds_under_8_severe_v2_MLP>, <Project laurens-devries/rds_under_8_severe_v2_KAN_api>, <Project laurens-devries/rds_under_8_severe_v2_SVM>])


In [11]:
for model in all_best_runs.keys():
    for run in all_best_runs[model].values():
        # print(model, run.summary.get("ACC"), run.summary.get("ACC std"), run.summary.get("AUC"), run.summary.get("AUC std"))
        # print the line above but multiply by 100 and round to get 1 decimal
        print(model, round(run.summary.get("ACC")*100, 1), round(run.summary.get("ACC std")*100, 1), round(run.summary.get("AUC")*100, 1), round(run.summary.get("AUC std")*100, 1))
        # print(f"{model}: {run.summary.get('ACC')}")
        print(run.config)
        print(model)
        print(run.name)

        test_acc = run.history()['test_acc']
        test_acc = [x for x in test_acc if str(x) != 'nan']
        # print(f"{model}: {test_acc}")

MLP 64.1 3.4 64.1 3.4
{'udi': 25751, 'mrmr': True, 'epochs': 25, 'dropout': 0.14819207263085396, 'n_layers': 3, 'optimizer': 'Adam', 'batch_size': 40, 'layer_size': 32, 'learning_rate': 0.00028067173441986674, 'hidden_dim_ratio': 0.5}
MLP
hopeful-sweep-10
MLP 54.6 5.8 54.6 5.8
{'udi': 25751, 'mrmr': False, 'epochs': 25, 'dropout': 0.009199087357563496, 'n_layers': 2, 'optimizer': 'Adam', 'batch_size': 104, 'layer_size': 64, 'learning_rate': 0.0005562109636298198, 'hidden_dim_ratio': 0.5}
MLP
gentle-sweep-17
GCN 52.9 3.6 52.9 3.7
{'mrmr': False, 'epochs': 25, 'dropout': 0.180929348594137, 'k_order': 7, 'k_degree': 10, 'optimizer': 'adam', 'batch_size': 96, 'learning_rate': 0.0007287092393359959}
GCN
quiet-sweep-41
GCN 52.8 4.3 52.9 4.3
{'mrmr': True, 'epochs': 25, 'dropout': 0.4720766135495867, 'k_order': 5, 'k_degree': 2, 'optimizer': 'adam', 'batch_size': 88, 'learning_rate': 3.354077125926464e-05}
GCN
fiery-sweep-20
KAN 54.6 2.4 54.7 2.4
{'udi': 25751, 'lamb': 0, 'mrmr': False, 'epoc

In [14]:
r = all_best_runs['SVM'].values()
r = list(r)[1]
print(r.name)
print(r.config)
print(r.summary.get("ACC"))
r_accs = r.history()['test_acc']
r_accs = [x for x in r_accs if str(x) != 'nan']
r_accs

different-sweep-351
{'C': 0.1, 'udi': 25751, 'mrmr': True, 'gamma': 0.001, 'degree': 4, 'kernel': 'sigmoid'}
0.6427184466019418


[0.6116504854368932,
 0.6893203883495146,
 0.6796116504854369,
 0.6504854368932039,
 0.6893203883495146,
 0.6116504854368932,
 0.6699029126213593,
 0.5825242718446602,
 0.5825242718446602,
 0.6601941747572816]

In [30]:
best_mlp_s1=all_best_runs['MLP']['kd0oas4g']

print(best_mlp_s1.summary.get("ACC"))
mlp1_accs = best_mlp_s1.history()['test_acc']

mlp1_accs =[x for x in mlp1_accs if str(x) != 'nan']

print(mlp1_accs)


0.5929347826086957
[0.5833333333333334, 0.7083333333333334, 0.5416666666666666, 0.7083333333333334, 0.5416666666666666, 0.5416666666666666, 0.6956521739130435, 0.5652173913043478, 0.43478260869565216, 0.6086956521739131]


In [31]:

t, p = stats.ttest_rel(mlp1_accs, r_accs)

print(t, p)
p2 = 2 * (1 - stats.t.cdf(abs(t), len(mlp1_accs)-1))
print(p2)

-4.733809684820553 0.0010681695963258213
0.0010681695963259408


In [None]:
def visualize_kan(weight, scale_base):
    # define B-spline parameters
    grid_size = 3
    spline_order = 3
    weights = weight
    knot_vector = np.concatenate(([-1] * spline_order, np.linspace(-1, 1, grid_size), [1] * spline_order))

    t = np.linspace(-1, 1, 100)

    spline = BSpline(knot_vector, weights, spline_order)
    spline_values = spline(t)
    
    silu = nn.SiLU()
    bias = silu(torch.tensor(t))

    spline_values = spline_values + bias.numpy()*scale_base.detach().numpy()

    plt.figure(figsize=(4, 3))
    plt.plot(t, spline_values, label='B-spline curve')
    plt.scatter(np.linspace(-1, 1, len(weights)), weights, color='red', label='Control points')
    plt.title('B-spline Curve')
    plt.xlabel('t')
    plt.ylabel('Value')
    plt.legend()
    plt.grid(True)
    plt.show()

In [8]:
ds = "data/csv/severe_rds_v2.csv"
u8_ds = "data/csv/rds_under_8_severe_v2.csv"
data_dir = "data/fetched/25751/raw"

mrmr_features = np.array([1140, 689, 427, 122, 139, 765, 907, 1384, 22, 1293, 1449, 492, 1440, 499, 1316, 1318, 135, 879, 886, 223, 1455, 676, 1136, 1464, 70, 1462, 1386, 939, 111, 413, 10, 1403, 1027, 547, 395, 1210, 942, 501, 45, 425, 638, 505, 26, 1409, 1448, 605, 232, 1459, 821, 1394, 1376, 809, 1163, 216, 791]
            )

mrmr_features_u8 = np.array([1035, 382, 967, 1316, 34, 852, 761, 480, 875, 1357, 1203, 686, 415, 248, 1230, 1370, 1275, 466, 1274, 126, 840, 1470, 448, 629, 1292, 922, 617, 168, 946, 1372, 131, 219, 1247, 413, 72, 496, 880, 275, 863, 11, 571, 147, 877, 843, 92, 124, 1157, 1457, 249, 1126, 741, 1289, 1270, 1453, 742]
            )


mrmr_dataset = FCMatrixDataset(ds, data_dir, "25751", None, mrmr=mrmr_features)
mrmr_u8_dataset = FCMatrixDataset(u8_ds, data_dir, "25751", None, mrmr=mrmr_features_u8)
dataset = FCMatrixDataset(ds, data_dir, "25751", None)

models = {"MLP": MLP, "KAN":KAN, "SVM": "SVM"}

for model_type in all_best_runs.keys():
    for run in all_best_runs[model_type].values():
        c = run.config
        # print(c)
        # print(model_type)

        if model_type == "GCN":
            # jump to next iteration
            continue
        if model_type == "MLP" or model_type == "KAN":
            input_features = 55 if c['mrmr'] else 1485

            cp = torch.load(f"models/saved/{model_type}/{run.name}.pth", map_location=DEVICE)

        if model_type == "MLP":
            hidden_dims = [int(c['layer_size'] * c['hidden_dim_ratio'] ** i) for i in range(c['n_layers'])]
            model = MLP(input_features, hidden_dims, 2, c['dropout']).to(DEVICE)
            model.load_state_dict(cp)

        if model_type == "KAN":
            hidden_dims = [int(c['layer_size'] * c['hidden_dim_ratio'] ** i) for i in range(c['n_layers'])]
            dims = [input_features] + hidden_dims + [2]
            model = KAN(dims, c['grid_size']).to(DEVICE)
            model.load_state_dict(cp)
            print(f"models/saved/{model_type}/{run.name}.pth")
            print(c["mrmr"], c["optimizer"])

        if model_type == "SVM":
            model = svm.SVC(kernel= c['kernel'], C=c['C'], gamma=c['gamma'], degree=c['degree'])

        if c["mrmr"] == True:
            X = torch.tensor(np.array([np.array(i[0]) for i in mrmr_dataset]))
            y = torch.tensor(np.array([i[1] for i in mrmr_dataset]))
        else:
            X = torch.tensor(np.array([np.array(i[0]) for i in dataset]))
            y = torch.tensor(np.array([i[1] for i in dataset]))


        # model.fit(X, y)
        # explainer = shap.DeepExplainer(model.predict, X)
        # shap_values = explainer.shap_values(np.array(X)[:5])
        # # shap.summary_plot(shap_values)

        # shap_v_summed = np.mean(np.abs(shap_values[0]), axis=0) + np.mean(np.abs(shap_values[1]), axis=0)
        # sorted_idx = np.argsort(shap_v_summed)[::-1]
        # print(sorted_idx)



        # print(run.name)
        



models/saved/KAN/honest-sweep-45.pth
False LBFGS
models/saved/KAN/glorious-sweep-83.pth
False Adam
models/saved/KAN/daily-sweep-67.pth
True LBFGS
models/saved/KAN/lunar-sweep-101.pth
True Adam


'elated-sweep-160'