In [1]:
import pandas as pd
import numpy as np
from utils.data import get_data, fetching_run
from models import ClassifierNN
from sklearn.neighbors import KNeighborsClassifier
import tensorflow as tf
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
from utils.argparser import build_parser
from datasets import CHEXPERT_remedis

2023-09-08 12:35:35.995755: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def get_auc_knn(K, x_train, y_train, x_test, y_test, weights="uniform"):
    if len(x_train)<K:
        auc=None
    else:
        model= KNeighborsClassifier(n_neighbors=K, weights=weights)
        #weights "uniform" or "distance"
        model.fit(x_train, y_train)
        y_pred= model.predict(x_test)
        auc= tf.keras.metrics.AUC(multi_label=True)(y_test, y_pred).numpy()
    return auc

def get_auc_continuous_knn(K, x_train, y_train, x_valid, y_valid, x_test, y_test, weights="uniform"):
    if len(x_train)<K:
        auc=None
    else:
        model= KNeighborsClassifier(n_neighbors=K, weights=weights)
        #weights "uniform" or "distance"
        model.fit(x_train, y_train)
        model.classes_= [np.array([0., 1.]),
                         np.array([0., 1.]),
                         np.array([0., 1.]),
                         np.array([0., 1.]),
                         np.array([0., 1.])]
        y_pred = model.predict_proba(x_test)
        y_pred = np.array(y_pred)[:,:,1]
        y_pred = np.moveaxis(y_pred, 0, 1)
        auc= tf.keras.metrics.AUC(multi_label=True)(y_test, y_pred).numpy()
    return auc
    
def get_auc_mlp(x_train, y_train, x_valid, y_valid, x_test, y_test, lr_init=0.001, n_epochs=100, patience=10):
    SHUFFLE_BUFFER_SIZE=128
    BATCH_SIZE=64
    
    ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    ds_valid = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))

    ds_train = ds_train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
    ds_test = ds_test.batch(BATCH_SIZE)
    ds_valid = ds_valid.batch(BATCH_SIZE)


    model = tf.keras.Sequential([
        # tf.keras.layers.Dense(1024, activation='relu'),
        # tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(5),
        tf.keras.layers.Activation('sigmoid'),
    ])

    model = tf.keras.Sequential([
        # tf.keras.layers.Dense(1024, activation='relu'),
        # tf.keras.layers.Dense(512, activation='relu'),
        # tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(5),
        tf.keras.layers.Activation('sigmoid'),
    ])

    

    model.build([None, 2048])
    
    # https://stackoverflow.com/questions/62350538/tf2-2-loading-a-saved-model-from-tensorflow-hub-failed-with-attributeerror
    optimizer= tf.keras.optimizers.Adam(learning_rate= lr_init)    
    model.compile(optimizer= optimizer, 
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
              metrics = [tf.keras.metrics.AUC(from_logits=False, multi_label= True)])

    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                patience=patience, 
                                                baseline=None, 
                                                start_from_epoch= 10)
    
    # Fitting the model
    history_train = model.fit(ds_train,
                              verbose=1, 
                              epochs=n_epochs,
                              validation_data= ds_valid,
                              validation_freq= 1,
                              callbacks=[callback], 
                             )
    history_train= pd.DataFrame.from_dict(history_train.history)

    #Evaluating the model
    history_test = model.evaluate(ds_test, verbose=0)

    return history_train, history_test
    


In [3]:
args= pd.Series({
    "dataset":"chexpert",
    "algorithm":"full",
    "sd":1,
    "gamma":0.5,
    "tsh":0.95,
    "hard_thresholding":"False",
    "separable":"not",
    "n_epochs":1000,
    "running_cluster":"True",
    "run":"chexpert_runs",
    "budget":"low",
    })

# args = build_parser().parse_args(tuple(sys.argv[1:]))

In [None]:
if args.sd is not None:
    np.random.seed(args.sd)

dataset, dataset_test, run_path, idx = get_data(args)
dataset_valid = CHEXPERT_remedis(type="valid", cluster=args.running_cluster)
scores, queries, radiuses, degrees, options, covers= fetching_run(args.algorithm, run_path)

x_test, y_test = dataset_test.get_all_data()
x_valid, y_valid = dataset_test.get_all_data()
aucs= pd.DataFrame(columns= ["5_NN", "20_NN", "100_NN", "5_NN_continuous", "20_NN_continuous", "100_NN_continuous", "mlp"])

if args.algorithm=="full":
    x_train, y_train = dataset.get_all_data()
    # auc5=  get_auc_knn(5, x_train, y_train, x_test, y_test, weights="distance")
    # auc20=  get_auc_knn(20, x_train, y_train, x_test, y_test, weights="distance")
    # auc100=  get_auc_knn(100, x_train, y_train, x_test, y_test, weights="distance")
    
    # auc5_cont=  get_auc_continuous_knn(5, x_train, y_train, x_test, y_test, weights="distance")
    # auc20_cont=  get_auc_continuous_knn(20, x_train, y_train, x_test, y_test, weights="distance")
    # auc100_cont=  get_auc_continuous_knn(100, x_train, y_train, x_test, y_test, weights="distance")
    # print(auc5, auc20, auc100, auc5_cont, auc20_cont, auc100_cont)
    _, history_test= get_auc_mlp(x_train, y_train, x_valid, y_valid, x_test, y_test, lr_init=0.001, n_epochs=100)
    new_row = {"5_NN":auc5, "20_NN":auc20, "100_NN":auc100, 
               "5_NN_continuous":auc5_cont, "20_NN_continuous":auc20_cont, "100_NN_continuous":auc100_cont, "mlp": history_test[-1]}
    aucs = pd.concat([aucs, pd.DataFrame([new_row])], ignore_index=True)
else:
    for i in range(len(idx)):  
        dataset.restart()
        dataset.observe(queries[:idx[i]])
        x_train, y_train = dataset.get_labeled_data()
        auc5=  get_auc_knn(5, x_train, y_train, x_test, y_test, weights="distance")
        auc20=  get_auc_knn(20, x_train, y_train, x_test, y_test, weights="distance")
        auc100=  get_auc_knn(100, x_train, y_train, x_test, y_test, weights="distance")
        
        auc5_cont=  get_auc_continuous_knn(5, x_train, y_train, x_test, y_test, weights="distance")
        auc20_cont=  get_auc_continuous_knn(20, x_train, y_train, x_test, y_test, weights="distance")
        auc100_cont=  get_auc_continuous_knn(100, x_train, y_train, x_test, y_test, weights="distance")
    
        _, history_test= get_auc_mlp(x_train, y_train, x_test, y_test, lr_init=0.001, n_epochs=100)
        new_row = {"5_NN":auc5, "20_NN":auc20, "100_NN":auc100, 
                   "5_NN_continuous":auc5_cont, "20_NN_continuous":auc20_cont, "100_NN_continuous":auc100_cont, "mlp": history_test[-1]}
        aucs = pd.concat([aucs, pd.DataFrame([new_row])], ignore_index=True)
        print(new_row)


2023-09-08 12:35:56.689359: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-09-08 12:35:56.820608: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-09-08 12:35:56.821521: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Epoch 1/100


2023-09-08 12:36:14.237946: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x14c13c5d12e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-09-08 12:36:14.238088: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1080 Ti, Compute Capability 6.1
2023-09-08 12:36:14.387705: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-09-08 12:36:18.635172: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8902
2023-09-08 12:36:18.990588: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-09-08 12:36:19.226170: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100

In [203]:
run_path+"/evaluation.csv"

'/cluster/work/grlab/projects/projects2022_doctor-in-the-loop/chexpert_runs/chexpert/1000_0.95_1/evaluation.csv'