In [7]:
import pandas as pd
import os
from kan import *
# create a KAN: 2D inputs, 1D output, and 5 hidden neurons. cubic spline (k=3), 5 grid intervals (grid=5).
model = KAN(width=[2,5,1], grid=5, k=3, seed=0)

import sys
sys.set_int_max_str_digits(0)

In [8]:
def confidence_interval(data, confidence=0.95):
    # Calculate confidence interval
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2.0, n-1)
    return m-h, m+h

In [12]:
question_numbers = [1, 2, 3, 4, 5, 6, 7, 8]         # Numbers of questions from DASS to run through
target = "anxiety_status"
models_to_train = 10        # Number of models for each number of questions from DASS
models_per_question = 1   # Number of ensembles per model
test_split = 0.1    
seed = 42
random.seed(seed)

ACCS = []
AUCS = []
PRES = []
RECS = []
F1S = []
AUC_STDEV = []
F1_STDEV = []
AUC_95CI_U = []
AUC_95CI_D = []
F1_95CI_U = []
F1_95CI_D = []

data_folder = "./data"
models_folder = "./models"

feats_df = pd.read_csv(os.path.join(data_folder, "features.csv"))
labels_df = pd.read_csv(os.path.join(data_folder, "labels.csv"))

questions = [20, 9, 30, 11, 19, 2, 36, 28, 4, 23]


In [None]:
for num_questions in question_numbers:
    lst_comb =[]
    
    train_losses = []
    test_losses = []
    if num_questions == 2:
        break
    
    
    for a in range(models_to_train):
        cols = ["gender_m", "gender_f", "region_other", 
                    "region_east", "region_west", "age_norm"]
        if num_questions == 1:  # Only has 1 question
            if a >= len(questions):
                break
            question_nums = [questions[a]]  # a=0, question[0]=20, question_nums contains a list of question numbers
            
        else:  # More than 1 question
            question_nums = random.sample(questions, num_questions)  # Randomly sample num_questions from all questions
            question_nums.sort() # Sort the questions
            #resample if already in list
            while question_nums in lst_comb:
                question_nums = random.sample(questions, num_questions)
            lst_comb.append(question_nums)
        
        for q in question_nums:  # q is one of the selected questions
            for j in range(4):
                cols.append("Q{0}A_{1}".format(q, j))  # Generate the question numbers
        features = feats_df[cols]  # Get the features for the selected questions
        

        labels = labels_df[[target]].copy()
        print("Num Questions: ", num_questions)
        print("Selected questions: ", question_nums, "a: ", a)
        print("features: ", len(features.columns))
        
        np.random.seed(seed)
        shufId = np.random.permutation(int(len(labels)))
        index = int(test_split * len(labels.index))  # Index of data to be used for testing

        df_prist = features.iloc[shufId[0:index]]   # Data for testing
        df_trainvalid = features.iloc[shufId[index:-1]] # Data for training and validation

        gt_prist = labels.iloc[shufId[0:index]]     # Labels for testing
        gt_trainvalid = labels.iloc[shufId[index:-1]]       # Labels for training and validation

        for b in range(models_per_question):
            np.random.seed(b)
            shufId = np.random.permutation(int(len(gt_trainvalid)))
            index = int((1/9) * len(gt_trainvalid.index))

            df_valid = df_trainvalid.iloc[shufId[0:index]]
            df_train = df_trainvalid.iloc[shufId[index:-1]]

            gt_valid = gt_trainvalid.iloc[shufId[0:index]]
            gt_train = gt_trainvalid.iloc[shufId[index:-1]]

            df_valid = df_valid.reset_index(drop=True)
            df_train = df_train.reset_index(drop=True)

            gt_valid = gt_valid.reset_index(drop=True)
            gt_train = gt_train.reset_index(drop=True)

            dataset = {}
            # Convert data to PyTorch tensors and store in dataset
            dataset['train_input'] = torch.from_numpy(df_train.values).float()
            dataset['train_label'] = torch.from_numpy(gt_train.values).float()
            dataset['test_input'] = torch.from_numpy(df_prist.values).float()
            dataset['test_label'] = torch.from_numpy(gt_prist.values).float()
            print("b: ", b)
            print("Train Input Size:", dataset['train_input'].shape)
            print("Train Label Size:", dataset['train_label'].shape)
            model = KAN(width=[dataset['train_input'].size(1),5,1], grid=2, k=3, seed=0)
            
            model(dataset['train_input'])  # obtain activation
            lib = ['x','x^2','x^3','x^4','exp','log','sqrt','tanh','sin','abs']
            model.auto_symbolic(lib=lib)
            
            def train_acc(): 
                return torch.mean((torch.round(model(dataset['train_input'])[:,0]) == dataset['train_label'][:,0]).float())

            def test_acc():
                return torch.mean((torch.round(model(dataset['test_input'])[:,0]) == dataset['test_label'][:,0]).float())
            
            results = model.train(dataset, opt="LBFGS", steps=20, metrics=(train_acc, test_acc))
            formula = model.symbolic_formula()[0][0]
            print(formula)
            
            def acc(formula, X, y):
                batch = X.shape[0]
                correct = 0
                for i in range(batch):
                    formula_eval = formula
                    for j in range(X.size(1)):
                        index = j + 1
                        formula_eval = formula_eval.subs('x_{0}'.format(index), X[i, j].item())
                    correct += (np.round(np.array(formula_eval).astype(np.float64)) == y[i, 0].item())
                return correct / batch

            print('train acc of the formula:', acc(formula, dataset['train_input'], dataset['train_label']))
            print('test acc of the formula:', acc(formula, dataset['test_input'], dataset['test_label']))

            train_losses += results['train_loss']
            test_losses += results['test_loss']
        
    

In [13]:
# For different numbers of questions from DASS-42
import pickle
from sklearn.metrics import (classification_report, balanced_accuracy_score, confusion_matrix, 
                roc_auc_score, accuracy_score, roc_curve, RocCurveDisplay)
import scipy


for num_questions in question_numbers:
    models = {}

    accs = []
    aucs = []
    pres = []
    recs = []
    f1s = []
    auc_stdev = []
    f1_stdev = []
    auc_95ci_u = []
    auc_95ci_d = []
    f1_95ci_u = []
    f1_95ci_d = []
    lst_comb = []

    model_num = 0
    for a in range(models_to_train):
        model = {}

        print("Training model", a)
        cols = ["gender_m", "gender_f", "region_other", 
                "region_east", "region_west", "age_norm"]

        if num_questions == 1:
            if a >= len(questions):
                break
            question_nums = [questions[a]]
        else:
            question_nums = random.sample(questions, num_questions)
            question_nums.sort()
            # Resample if already in list
            while question_nums in lst_comb:
                question_nums = random.sample(questions, num_questions)
            lst_comb.append(question_nums)

        for q in question_nums:
            for j in range(4):
                cols.append("Q{0}A_{1}".format(q, j))
        features = feats_df[cols]

        labels = labels_df[[target]].copy()

        np.random.seed(seed)
        shufId = np.random.permutation(int(len(labels)))
        index = int(test_split * len(labels.index))

        df_prist = features.iloc[shufId[0:index]]
        df_trainvalid = features.iloc[shufId[index:-1]]

        gt_prist = labels.iloc[shufId[0:index]]
        gt_trainvalid = labels.iloc[shufId[index:-1]]

        df_prist.to_csv(os.path.join(data_folder, "prist_features.csv"), index=False)
        gt_prist.to_csv(os.path.join(data_folder, "prist_labels.csv"), index=False)

        accs1 = []
        aucs1 = []
        pres1 = []
        recs1 = []
        f1s1 = []
        ensemble_models = []

        for b in range(models_per_question):
            if b % 10 == 0:
                print("Training iteration", b)

            np.random.seed(b)
            shufId = np.random.permutation(int(len(gt_trainvalid)))
            index = int((1/9) * len(gt_trainvalid.index))

            df_valid = df_trainvalid.iloc[shufId[0:index]]
            df_train = df_trainvalid.iloc[shufId[index:-1]]

            gt_valid = gt_trainvalid.iloc[shufId[0:index]]
            gt_train = gt_trainvalid.iloc[shufId[index:-1]]

            df_valid = df_valid.reset_index(drop=True)
            df_train = df_train.reset_index(drop=True)

            gt_valid = gt_valid.reset_index(drop=True)
            gt_train = gt_train.reset_index(drop=True)

            dataset = {}
            dataset['train_input'] = torch.from_numpy(df_train.values).float()
            dataset['train_label'] = torch.from_numpy(gt_train.values).float()
            dataset['test_input'] = torch.from_numpy(df_prist.values).float()
            dataset['test_label'] = torch.from_numpy(gt_prist.values).float()
            clf = KAN(width=[dataset['train_input'].size(1),5,1], grid=2, k=3, seed=0)

            clf(dataset['train_input'])
            lib = ['x', 'x^2', 'x^3', 'x^4', 'exp', 'log', 'sqrt', 'tanh', 'sin', 'abs']
            clf.auto_symbolic(lib=lib)

            def train_acc():
                return torch.mean((torch.round(clf(dataset['train_input'])[:, 0]) == dataset['train_label'][:, 0]).float())

            def test_acc():
                return torch.mean((torch.round(clf(dataset['test_input'])[:, 0]) == dataset['test_label'][:, 0]).float())

            results = clf.train(dataset, opt="LBFGS", steps=20, metrics=(train_acc, test_acc))
            formula = clf.symbolic_formula()[0][0]
            
            def acc(formula, X, y):
                batch = X.shape[0]
                correct = 0
                for i in range(batch):
                    formula_eval = formula
                    for j in range(X.size(1)):
                        index = j + 1
                        formula_eval = formula_eval.subs('x_{0}'.format(index), X[i, j].item())
                    correct += (np.round(np.array(formula_eval).astype(np.float64)) == y[i, 0].item())
                return correct / batch

            train_accuracy = acc(formula, dataset['train_input'], dataset['train_label'])
            test_accuracy = acc(formula, dataset['test_input'], dataset['test_label'])

            accs1.append(test_accuracy)
            # Assuming roc_auc_score and other metrics can be calculated, similar to the MLP example
            auc_score = roc_auc_score(gt_prist, torch.round(clf(dataset['test_input'])[:, 0]).detach().numpy())
            aucs1.append(auc_score)

        mean_acc1 = np.mean(accs1)
        mean_auc1 = np.mean(aucs1)
        stdev_auc1 = np.std(aucs1)
        ci_auc1_u, ci_auc1_d = confidence_interval(aucs1)
        mean_pre1 = np.mean(pres1)
        mean_rec1 = np.mean(recs1)
        mean_f11 = np.mean(f1s1)
        stdev_f11 = np.std(f1s1)
        ci_f11_u, ci_f11_d = confidence_interval(f1s1)

        accs.append(mean_acc1)
        aucs.append(mean_auc1)
        auc_stdev.append(stdev_auc1)
        auc_95ci_u.append(ci_auc1_u)
        auc_95ci_d.append(ci_auc1_d)
        pres.append(mean_pre1)
        recs.append(mean_rec1)
        f1s.append(mean_f11)
        f1_stdev.append(stdev_f11)
        f1_95ci_u.append(ci_f11_u)
        f1_95ci_d.append(ci_f11_d)

        model["questions"] = question_nums
        model["models"] = ensemble_models
        model["auc_score"] = mean_auc1
        model["f1_score"] = mean_f11

        models[model_num] = model
        model_num += 1

    mean_acc = np.mean(accs)
    mean_auc = np.mean(aucs)
    stdev_auc = np.mean(auc_stdev)
    ci_auc_u = np.mean(auc_95ci_u)
    ci_auc_d = np.mean(auc_95ci_d)
    mean_pre = np.mean(pres)
    mean_rec = np.mean(recs)
    mean_f1  = np.mean(f1s)
    stdev_f1 = np.mean(f1_stdev)
    ci_f1_u = np.mean(f1_95ci_u)
    ci_f1_d = np.mean(f1_95ci_d)

    percentile_list = pd.DataFrame(
    {
        'accuracy': accs,
        'auc_roc': aucs,
        'auc_stdev': auc_stdev,
        'auc_95ci_u': auc_95ci_u,
        'auc_95ci_d': auc_95ci_d,
        'precision': pres,
        'recall': recs,
        'f1_score': f1s,
        'f1_stdev': f1_stdev,
        'f1_95ci_u': f1_95ci_u,
        'f1_95ci_d': f1_95ci_d,
    })
    percentile_list.to_csv('./data/results_kan.csv', mode='a', header=True)

    print("\nNumber of questions:", num_questions)
    print("Mean AUC      :", mean_auc)
    print("Stdev AUC     :", stdev_auc)
    print("95th CI AUC   :", ci_auc_u, ci_auc_d)
    print("Mean F1-Score :", mean_f1)
    print("Stdev F1      :", stdev_f1)
    print("95th CI F1    :", ci_f1_u, ci_f1_d)

    ACCS.append(mean_acc)
    AUCS.append(mean_auc)
    AUC_STDEV.append(stdev_auc)
    AUC_95CI_U.append(ci_auc_u)
    AUC_95CI_D.append(ci_auc_d)
    PRES.append(mean_pre)
    RECS.append(mean_rec)
    F1S.append(mean_f1)
    F1_STDEV.append(stdev_f1)
    F1_95CI_U.append(ci_f1_u)
    F1_95CI_D.append(ci_f1_d)

    with open("./data/models_kan.bin", "wb") as f:
        pickle.dump(models, f)

# Plotting the accuracy
plt.figure()
plt.plot(ACCS, label='Accuracy')

Training model 0
Training iteration 0
fixing (0,0,0) with exp, r2=1.0000000000000016
fixing (0,0,1) with exp, r2=1.0000000000000016
fixing (0,0,2) with exp, r2=1.0000000000000018
fixing (0,0,3) with exp, r2=1.0000000000000013
fixing (0,0,4) with exp, r2=1.0000000000000018
fixing (0,1,0) with x^4, r2=1.000000000000002
fixing (0,1,1) with x^4, r2=1.0000000000000018
fixing (0,1,2) with exp, r2=1.0000000000000016
fixing (0,1,3) with exp, r2=1.0000000000000018
fixing (0,1,4) with exp, r2=1.0000000000000016
fixing (0,2,0) with exp, r2=1.0000000000000018
fixing (0,2,1) with x^4, r2=1.0000000000000013
fixing (0,2,2) with exp, r2=1.0000000000000016
fixing (0,2,3) with exp, r2=1.0000000000000013
fixing (0,2,4) with x^4, r2=1.0000000000000016
fixing (0,3,0) with exp, r2=1.0000000000000027
fixing (0,3,1) with exp, r2=1.0000000000000022
fixing (0,3,2) with exp, r2=1.0000000000000022
fixing (0,3,3) with exp, r2=1.0000000000000022
fixing (0,3,4) with exp, r2=1.000000000000002
fixing (0,4,0) with exp,

train loss: 3.78e-01 | test loss: 3.84e-01 | reg: 9.65e+01 : 100%|██| 20/20 [00:08<00:00,  2.31it/s]
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  arrmean = um.true_divide(arrmean, div, out=arrmean,


Training model 1
Training iteration 0
fixing (0,0,0) with exp, r2=1.0000000000000016
fixing (0,0,1) with exp, r2=1.0000000000000016
fixing (0,0,2) with exp, r2=1.0000000000000018
fixing (0,0,3) with exp, r2=1.0000000000000013
fixing (0,0,4) with exp, r2=1.0000000000000018
fixing (0,1,0) with x^4, r2=1.000000000000002
fixing (0,1,1) with x^4, r2=1.0000000000000018
fixing (0,1,2) with exp, r2=1.0000000000000016
fixing (0,1,3) with exp, r2=1.0000000000000018
fixing (0,1,4) with exp, r2=1.0000000000000016
fixing (0,2,0) with exp, r2=1.0000000000000018
fixing (0,2,1) with x^4, r2=1.0000000000000013
fixing (0,2,2) with exp, r2=1.0000000000000016
fixing (0,2,3) with exp, r2=1.0000000000000013
fixing (0,2,4) with x^4, r2=1.0000000000000016
fixing (0,3,0) with exp, r2=1.0000000000000027
fixing (0,3,1) with exp, r2=1.0000000000000022
fixing (0,3,2) with exp, r2=1.0000000000000022
fixing (0,3,3) with exp, r2=1.0000000000000022
fixing (0,3,4) with exp, r2=1.000000000000002
fixing (0,4,0) with exp,

train loss: 3.79e-01 | test loss: 3.94e-01 | reg: 8.84e+01 : 100%|██| 20/20 [00:08<00:00,  2.35it/s]
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  arrmean = um.true_divide(arrmean, div, out=arrmean,


Training model 2
Training iteration 0
fixing (0,0,0) with exp, r2=1.0000000000000016
fixing (0,0,1) with exp, r2=1.0000000000000016
fixing (0,0,2) with exp, r2=1.0000000000000018
fixing (0,0,3) with exp, r2=1.0000000000000013
fixing (0,0,4) with exp, r2=1.0000000000000018
fixing (0,1,0) with x^4, r2=1.000000000000002
fixing (0,1,1) with x^4, r2=1.0000000000000018
fixing (0,1,2) with exp, r2=1.0000000000000016
fixing (0,1,3) with exp, r2=1.0000000000000018
fixing (0,1,4) with exp, r2=1.0000000000000016
fixing (0,2,0) with exp, r2=1.0000000000000018
fixing (0,2,1) with x^4, r2=1.0000000000000013
fixing (0,2,2) with exp, r2=1.0000000000000016
fixing (0,2,3) with exp, r2=1.0000000000000013
fixing (0,2,4) with x^4, r2=1.0000000000000016
fixing (0,3,0) with exp, r2=1.0000000000000027
fixing (0,3,1) with exp, r2=1.0000000000000022
fixing (0,3,2) with exp, r2=1.0000000000000022
fixing (0,3,3) with exp, r2=1.0000000000000022
fixing (0,3,4) with exp, r2=1.000000000000002
fixing (0,4,0) with exp,

train loss: 3.98e-01 | test loss: 4.59e-01 | reg: 6.25e+01 : 100%|██| 20/20 [00:09<00:00,  2.19it/s]
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  arrmean = um.true_divide(arrmean, div, out=arrmean,


Training model 3
Training iteration 0
fixing (0,0,0) with exp, r2=1.0000000000000016
fixing (0,0,1) with exp, r2=1.0000000000000016
fixing (0,0,2) with exp, r2=1.0000000000000018
fixing (0,0,3) with exp, r2=1.0000000000000013
fixing (0,0,4) with exp, r2=1.0000000000000018
fixing (0,1,0) with x^4, r2=1.000000000000002
fixing (0,1,1) with x^4, r2=1.0000000000000018
fixing (0,1,2) with exp, r2=1.0000000000000016
fixing (0,1,3) with exp, r2=1.0000000000000018
fixing (0,1,4) with exp, r2=1.0000000000000016
fixing (0,2,0) with exp, r2=1.0000000000000018
fixing (0,2,1) with x^4, r2=1.0000000000000013
fixing (0,2,2) with exp, r2=1.0000000000000016
fixing (0,2,3) with exp, r2=1.0000000000000013
fixing (0,2,4) with x^4, r2=1.0000000000000016
fixing (0,3,0) with exp, r2=1.0000000000000027
fixing (0,3,1) with exp, r2=1.0000000000000022
fixing (0,3,2) with exp, r2=1.0000000000000022
fixing (0,3,3) with exp, r2=1.0000000000000022
fixing (0,3,4) with exp, r2=1.000000000000002
fixing (0,4,0) with exp,

train loss: 4.19e-01 | test loss: 4.33e-01 | reg: 8.62e+01 : 100%|██| 20/20 [00:08<00:00,  2.31it/s]
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  arrmean = um.true_divide(arrmean, div, out=arrmean,


Training model 4
Training iteration 0
fixing (0,0,0) with exp, r2=1.0000000000000016
fixing (0,0,1) with exp, r2=1.0000000000000016
fixing (0,0,2) with exp, r2=1.0000000000000018
fixing (0,0,3) with exp, r2=1.0000000000000013
fixing (0,0,4) with exp, r2=1.0000000000000018
fixing (0,1,0) with x^4, r2=1.000000000000002
fixing (0,1,1) with x^4, r2=1.0000000000000018
fixing (0,1,2) with exp, r2=1.0000000000000016
fixing (0,1,3) with exp, r2=1.0000000000000018
fixing (0,1,4) with exp, r2=1.0000000000000016
fixing (0,2,0) with exp, r2=1.0000000000000018
fixing (0,2,1) with x^4, r2=1.0000000000000013
fixing (0,2,2) with exp, r2=1.0000000000000016
fixing (0,2,3) with exp, r2=1.0000000000000013
fixing (0,2,4) with x^4, r2=1.0000000000000016
fixing (0,3,0) with exp, r2=1.0000000000000027
fixing (0,3,1) with exp, r2=1.0000000000000022
fixing (0,3,2) with exp, r2=1.0000000000000022
fixing (0,3,3) with exp, r2=1.0000000000000022
fixing (0,3,4) with exp, r2=1.000000000000002
fixing (0,4,0) with exp,

train loss: 4.09e-01 | test loss: 3.99e-01 | reg: 5.02e+01 : 100%|██| 20/20 [00:08<00:00,  2.36it/s]
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  arrmean = um.true_divide(arrmean, div, out=arrmean,


Training model 5
Training iteration 0
fixing (0,0,0) with exp, r2=1.0000000000000016
fixing (0,0,1) with exp, r2=1.0000000000000016
fixing (0,0,2) with exp, r2=1.0000000000000018
fixing (0,0,3) with exp, r2=1.0000000000000013
fixing (0,0,4) with exp, r2=1.0000000000000018
fixing (0,1,0) with x^4, r2=1.000000000000002
fixing (0,1,1) with x^4, r2=1.0000000000000018
fixing (0,1,2) with exp, r2=1.0000000000000016
fixing (0,1,3) with exp, r2=1.0000000000000018
fixing (0,1,4) with exp, r2=1.0000000000000016
fixing (0,2,0) with exp, r2=1.0000000000000018
fixing (0,2,1) with x^4, r2=1.0000000000000013
fixing (0,2,2) with exp, r2=1.0000000000000016
fixing (0,2,3) with exp, r2=1.0000000000000013
fixing (0,2,4) with x^4, r2=1.0000000000000016
fixing (0,3,0) with exp, r2=1.0000000000000027
fixing (0,3,1) with exp, r2=1.0000000000000022
fixing (0,3,2) with exp, r2=1.0000000000000022
fixing (0,3,3) with exp, r2=1.0000000000000022
fixing (0,3,4) with exp, r2=1.000000000000002
fixing (0,4,0) with exp,

train loss: 4.34e-01 | test loss: 4.09e-01 | reg: 7.07e+01 : 100%|██| 20/20 [00:08<00:00,  2.29it/s]
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  arrmean = um.true_divide(arrmean, div, out=arrmean,


Training model 6
Training iteration 0
fixing (0,0,0) with exp, r2=1.0000000000000016
fixing (0,0,1) with exp, r2=1.0000000000000016
fixing (0,0,2) with exp, r2=1.0000000000000018
fixing (0,0,3) with exp, r2=1.0000000000000013
fixing (0,0,4) with exp, r2=1.0000000000000018
fixing (0,1,0) with x^4, r2=1.000000000000002
fixing (0,1,1) with x^4, r2=1.0000000000000018
fixing (0,1,2) with exp, r2=1.0000000000000016
fixing (0,1,3) with exp, r2=1.0000000000000018
fixing (0,1,4) with exp, r2=1.0000000000000016
fixing (0,2,0) with exp, r2=1.0000000000000018
fixing (0,2,1) with x^4, r2=1.0000000000000013
fixing (0,2,2) with exp, r2=1.0000000000000016
fixing (0,2,3) with exp, r2=1.0000000000000013
fixing (0,2,4) with x^4, r2=1.0000000000000016
fixing (0,3,0) with exp, r2=1.0000000000000027
fixing (0,3,1) with exp, r2=1.0000000000000022
fixing (0,3,2) with exp, r2=1.0000000000000022
fixing (0,3,3) with exp, r2=1.0000000000000022
fixing (0,3,4) with exp, r2=1.000000000000002
fixing (0,4,0) with exp,

train loss: 3.83e-01 | test loss: 3.89e-01 | reg: 1.70e+02 : 100%|██| 20/20 [00:08<00:00,  2.44it/s]
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  arrmean = um.true_divide(arrmean, div, out=arrmean,


Training model 7
Training iteration 0
fixing (0,0,0) with exp, r2=1.0000000000000016
fixing (0,0,1) with exp, r2=1.0000000000000016
fixing (0,0,2) with exp, r2=1.0000000000000018
fixing (0,0,3) with exp, r2=1.0000000000000013
fixing (0,0,4) with exp, r2=1.0000000000000018
fixing (0,1,0) with x^4, r2=1.000000000000002
fixing (0,1,1) with x^4, r2=1.0000000000000018
fixing (0,1,2) with exp, r2=1.0000000000000016
fixing (0,1,3) with exp, r2=1.0000000000000018
fixing (0,1,4) with exp, r2=1.0000000000000016
fixing (0,2,0) with exp, r2=1.0000000000000018
fixing (0,2,1) with x^4, r2=1.0000000000000013
fixing (0,2,2) with exp, r2=1.0000000000000016
fixing (0,2,3) with exp, r2=1.0000000000000013
fixing (0,2,4) with x^4, r2=1.0000000000000016
fixing (0,3,0) with exp, r2=1.0000000000000027
fixing (0,3,1) with exp, r2=1.0000000000000022
fixing (0,3,2) with exp, r2=1.0000000000000022
fixing (0,3,3) with exp, r2=1.0000000000000022
fixing (0,3,4) with exp, r2=1.000000000000002
fixing (0,4,0) with exp,

train loss: 3.60e-01 | test loss: 3.58e-01 | reg: 1.02e+02 : 100%|██| 20/20 [00:08<00:00,  2.26it/s]


In [None]:
# Print results
print("\nAll accuracies:", ACCS)
print("All AUCs:", AUCS)
print("Stdev of AUCs:", AUC_STDEV)
print("95th CI of AUCs:", AUC_95CI_U)
print("95th CI of AUCs:", AUC_95CI_D)
print("All precisions:", PRES)
print("All recalls:", RECS)
print("All F1s:", F1S)
print("Stdev of F1s:", F1_STDEV)
print("95th CI of F1s:", F1_95CI_U)
print("95th CI of F1s:", F1_95CI_D)

In [None]:
# Plot accuracy results
plt.figure(figsize=(10,10)) # Make new figure
plt.plot(question_numbers, ACCS)
plt.plot(question_numbers, AUCS)
plt.plot(question_numbers, F1S)
plt.plot(question_numbers, PRES)
plt.plot(question_numbers, RECS)
plt.xlabel("Number of DASS questions")
plt.ylabel("Accuracy")
plt.legend(["Accuracy score", "AUC ROC score", "F1 score", "Precision", "Recall"])
plt.show()