In [1]:
import os
import pandas as pd
import scipy.stats as st
import numpy as np

In [2]:
def confidence(data):
    res = st.t.interval(confidence=0.95, df=len(data)-1,
                  loc=np.mean(data),
                  scale=st.sem(data))
    res = [round(i, 3) for i in res]
    return tuple(res)

#### study

In [3]:
import os

import joblib
import optuna

In [4]:
study = joblib.load(os.path.join("study/", "study_notes_physi.joblib"))

In [5]:
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))

best_trial = study.best_trial
print("Best trial:", best_trial)
print("  Value: ", best_trial.value)

Study statistics: 
  Number of finished trials:  20
Best trial: FrozenTrial(number=6, state=1, values=[0.8533300174330142], datetime_start=datetime.datetime(2023, 9, 28, 23, 57, 5, 425060), datetime_complete=datetime.datetime(2023, 9, 29, 0, 10, 16, 877734), params={'class_weight': 1.4, 'notes_hidden': 20}, user_attrs={}, system_attrs={'completed_rung_0': 0.8594568444826267, 'completed_rung_1': 0.8594568444826267}, intermediate_values={3: 0.8594568444826267, 4: 0.8533300174330142}, distributions={'class_weight': FloatDistribution(high=3.0, log=False, low=1.0, step=0.1), 'notes_hidden': CategoricalDistribution(choices=(20, 50, 100, 150, 200))}, trial_id=6, value=None)
  Value:  0.8533300174330142


#### notes_physi

In [9]:
ckpt_dir = "ckpt/notes_physi"

all_auc_roc = []
all_auc_pr = []
best_auc_roc = []
best_auc_pr = []

for trial_dir in os.listdir(ckpt_dir):
    test_info = pd.read_csv(os.path.join(ckpt_dir, trial_dir, "test_info.csv"))
    all_auc_roc.extend(test_info["auc_roc"].to_list())
    all_auc_pr.extend(test_info["auc_pr"].to_list())
    # if trial_dir == "best_1_trial":
    #     best_auc_roc.extend(test_info["auc_roc"].to_list())
    #     best_auc_pr.extend(test_info["auc_pr"].to_list())
        
print("all_auc_roc 95% confidence interval", np.round(np.mean(confidence(all_auc_roc)), 3), confidence(all_auc_roc))
print("all_auc_pr 95% confidence interval", np.round(np.mean(confidence(all_auc_pr)), 3), confidence(all_auc_pr))

all_auc_roc 95% confidence interval 0.85 (0.845, 0.856)
all_auc_pr 95% confidence interval 0.408 (0.391, 0.424)


#### notes_vital

In [10]:
ckpt_dir = "ckpt/notes_vital/"

all_auc_roc = []
all_auc_pr = []
best_auc_roc = []
best_auc_pr = []

for trial_dir in os.listdir(ckpt_dir):
    test_info = pd.read_csv(os.path.join(ckpt_dir, trial_dir, "test_info.csv"))
    all_auc_roc.extend(test_info["auc_roc"].to_list())
    all_auc_pr.extend(test_info["auc_pr"].to_list())
    # if trial_dir == "best_1_trial":
    #     best_auc_roc.extend(test_info["auc_roc"].to_list())
    #     best_auc_pr.extend(test_info["auc_pr"].to_list())
        
print("all_auc_roc 95% confidence interval", np.round(np.mean(confidence(all_auc_roc)), 3), confidence(all_auc_roc))
print("all_auc_pr 95% confidence interval", np.round(np.mean(confidence(all_auc_pr)), 3), confidence(all_auc_pr))

all_auc_roc 95% confidence interval 0.814 (0.805, 0.824)
all_auc_pr 95% confidence interval 0.35 (0.335, 0.364)


#### physi_vital

In [11]:
ckpt_dir = "ckpt/physi_vital/"

all_auc_roc = []
all_auc_pr = []
best_auc_roc = []
best_auc_pr = []

for trial_dir in os.listdir(ckpt_dir):
    test_info = pd.read_csv(os.path.join(ckpt_dir, trial_dir, "test_info.csv"))
    all_auc_roc.extend(test_info["auc_roc"].to_list())
    all_auc_pr.extend(test_info["auc_pr"].to_list())
    # if trial_dir == "best_1_trial":
    #     best_auc_roc.extend(test_info["auc_roc"].to_list())
    #     best_auc_pr.extend(test_info["auc_pr"].to_list())
        
print("all_auc_roc 95% confidence interval", np.round(np.mean(confidence(all_auc_roc)), 3), confidence(all_auc_roc))
print("all_auc_pr 95% confidence interval", np.round(np.mean(confidence(all_auc_pr)), 3), confidence(all_auc_pr))

all_auc_roc 95% confidence interval 0.849 (0.841, 0.858)
all_auc_pr 95% confidence interval 0.429 (0.404, 0.454)


#### all

In [12]:
ckpt_dir = "ckpt/notes_physi_vital/"

all_auc_roc = []
all_auc_pr = []
best_auc_roc = []
best_auc_pr = []

for trial_dir in os.listdir(ckpt_dir):
    test_info = pd.read_csv(os.path.join(ckpt_dir, trial_dir, "test_info.csv"))
    all_auc_roc.extend(test_info["auc_roc"].to_list())
    all_auc_pr.extend(test_info["auc_pr"].to_list())
    # if trial_dir == "best_1_trial":
    #     best_auc_roc.extend(test_info["auc_roc"].to_list())
    #     best_auc_pr.extend(test_info["auc_pr"].to_list())
        
print("all_auc_roc 95% confidence interval", np.round(np.mean(confidence(all_auc_roc)), 3), confidence(all_auc_roc))
print("all_auc_pr 95% confidence interval", np.round(np.mean(confidence(all_auc_pr)), 3), confidence(all_auc_pr))

all_auc_roc 95% confidence interval 0.858 (0.851, 0.866)
all_auc_pr 95% confidence interval 0.43 (0.412, 0.449)
