In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib widget

In [3]:
import numpy as np
import random
import sys

sys.path.append('../tools')

import os

import torch

import pandas as pd
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl
import pytorch_lightning.loggers as pl_loggers
import pytorch_lightning.callbacks as pl_callbacks
import data_utility, annotation_utility
from models.rns_dataloader import *
from active_learning_utility import get_strategy
from active_learning_data import Data
from active_learning_net import Net
from copy import deepcopy
from models.SwaV import SwaV
from models.LSTMDownStream import SupervisedDownstream
import warnings
import pickle
warnings.filterwarnings("ignore")


In [4]:
random_seed = 42
random.seed(random_seed)
torch.manual_seed(random_seed)
np.random.seed(random_seed)

if torch.cuda.is_available():
    torch.cuda.manual_seed(random_seed)
    # True ensures the algorithm selected by CUFA is deterministic
    torch.backends.cudnn.deterministic = True
    # torch.set_deterministic(True)
    # False ensures CUDA select the same algorithm each time the application is run
    torch.backends.cudnn.benchmark = False

import pytorch_lightning

pytorch_lightning.utilities.seed.seed_everything(seed=random_seed, workers=True)

Global seed set to 42


42

In [5]:
data_dir = "../../../user_data/"
log_folder_root = '../../../user_data/logs/'
ckpt_folder_root = '../../../user_data/checkpoints/'

In [6]:
strategy_name = 'LeastConfidence'

In [7]:
nStart = 1
nEnd = 20
nQuery = 2

In [8]:
args_task = {'n_epoch': 60,
             'transform_train': True,
             'strategy_name': strategy_name,
             'transform': False,
             'loader_tr_args': {'batch_size': 8, 'num_workers': 4, 'collate_fn': collate_fn,
                                'drop_last': True, 'persistent_workers': True},
             'loader_te_args': {'batch_size': 4, 'num_workers': 4, 'collate_fn': collate_fn,
                                'drop_last': True, 'persistent_workers': True}
             }

In [9]:
# raw_annotations = pd.read_csv(data_dir + 'full_updated_anns_annotTbl_cleaned.csv')
# ids = list(np.unique(raw_annotations[raw_annotations['descriptions'].notnull()]['HUP_ID']))
# # ids = list(np.unique(raw_annotations['HUP_ID']))
#
# data_import = data_utility.read_files(path=data_dir+'rns_data', path_data=data_dir+'rns_raw_cache', patientIDs=ids,
#                                       verbose=True)  # Import data with annotation

In [10]:
# data_list = os.listdir(data_dir+'rns_test_cache')
# print(data_list)
data_list = ['HUP047.npy', 'HUP084.npy', 'HUP096.npy', 'HUP109.npy', 'HUP121.npy', 'HUP129.npy', 'HUP131.npy',
             'HUP137.npy', 'HUP147.npy', 'HUP156.npy', 'HUP159.npy', 'HUP182.npy', 'HUP197.npy', 'HUP199.npy',
             'RNS026.npy', 'RNS029.npy']
# data_list = os.listdir(data_dir+'rns_test_cache')[1:]

# data_list = ['HUP182.npy',   'HUP129.npy',   'HUP109.npy', 'HUP156.npy', 'HUP096.npy', 'RNS026.npy',  'HUP159.npy']
# data_list = ['RNS026.npy', 'HUP159.npy', 'HUP129.npy', 'HUP096.npy', 'HUP182.npy']
train_data, train_label, test_data, test_label, train_index, test_index = get_data_by_episode(data_list, split=0.8)
# data, label,_,_ = get_data(data_list, split=1)
# train_data, test_data, train_label, test_label = sklearn.model_selection.train_test_split(data, label, test_size=0.8, random_state=42)

print(train_data.shape)
print(train_label.shape)
print(test_data.shape)
print(test_label.shape)

16it [00:04,  3.92it/s]

(992,)
(992,)
(270,)
(270,)





In [11]:
X_train = np.concatenate(train_data)
y_train = np.concatenate(train_label)
X_test = np.concatenate(test_data)
y_test = np.concatenate(test_label)
index_train = np.concatenate(train_index)
index_test = np.concatenate(test_index)
seq_len_train = np.array([y.shape[0] for y in train_label])
seq_len_test = np.array([y.shape[0] for y in test_label])

In [12]:
n_pool = len(train_data)
n_test = len(test_data)

NUM_INIT_LB = int(nStart * n_pool / 100)
NUM_QUERY = int(nQuery * n_pool / 100) if nStart != 100 else 0
NUM_ROUND = int((int(nEnd * n_pool / 100) - NUM_INIT_LB) / NUM_QUERY) if nStart != 100 else 0
if NUM_QUERY != 0:
    if (int(nEnd * n_pool / 100) - NUM_INIT_LB) % NUM_QUERY != 0:
        NUM_ROUND += 1

print(NUM_INIT_LB)
print(NUM_QUERY)
print(NUM_ROUND)

9
19
10


In [13]:
dataset = Data(X_train, y_train, X_test, y_test, seq_len_train, seq_len_test, RNS_Active_by_episode_LSTM, args_task)

In [14]:
swav = SwaV().load_from_checkpoint(
    ckpt_folder_root + 'rns_swav_50_12/rns_swav-epoch=82-swav_loss=2.58204.ckpt')
model = SupervisedDownstream(swav.backbone)
# initialize model and save the model state
modelstate = deepcopy(model.state_dict())
device = "cuda" if torch.cuda.is_available() else "cpu"

net = Net(model, args_task, device, ckpt_folder_root='rns_active', log_folder_root='rns_active')

In [15]:
strategy = get_strategy(strategy_name, dataset, net, None, args_task, rns_data=True)

In [16]:
# initial round of training, round 0
dataset.initialize_labels(NUM_INIT_LB)

In [17]:
# ids,_ = dataset.get_labeled_data()
# 
# selected_inds = {}
# selected_inds[0] = ids
# 
# if not os.path.exists(log_folder_root + 'rns_active_selected/' + strategy_name):
#     os.makedirs(log_folder_root + 'rns_active_selected/' + strategy_name)
# 
# # Serialize and save to a file
# with open(log_folder_root + 'rns_active_selected/' + strategy_name + '/' + 'selected_indices.pkl', 'wb') as f:
#     pickle.dump(selected_inds, f)

In [18]:
# strategy.train()

In [19]:
ckpt_save_n_step = 20

checkpoint_callback = pl_callbacks.ModelCheckpoint(monitor='train_loss',
                                                    filename=
                                                    strategy.net.params['strategy_name'] + '_round_' + str(strategy.net.round)
                                                    + '-{step}-{train_loss:.5f}',
                                                    dirpath=strategy.net.ckpt_folder_root + 'active_checkpoints_'
                                                            + strategy.net.params['strategy_name'],
                                                    save_top_k=-1,
                                                    every_n_train_steps=ckpt_save_n_step,
                                                    save_on_train_epoch_end=False)

early_stop_callback = pl_callbacks.EarlyStopping(monitor="val_acc",
                                                    patience=10,
                                                    verbose=False,
                                                    mode="max")

csv_logger = pl_loggers.CSVLogger(strategy.net.log_folder_root + "active_logs_" + strategy.net.params['strategy_name'],
                                    name='logger_round_' + str(strategy.net.round))

trainer = pl.Trainer(logger=csv_logger,
                        max_epochs=strategy.net.params['n_epoch'],
                        callbacks=[checkpoint_callback, early_stop_callback],
                        accelerator='gpu',
                        devices=1,
                        log_every_n_steps=50,
                        precision=16,
                        check_val_every_n_epoch=None,
                        val_check_interval=ckpt_save_n_step,
                        enable_model_summary=False,
                        )

strategy.net.trainer = trainer

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [20]:
torch.cuda.is_available()

True

In [21]:

for rd in range(1, NUM_ROUND + 1):
    

    print('round ' + str(rd))
    log_file_name = log_folder_root + 'rns_active/active_logs_' + strategy_name + '/logger_round_' + str(
        rd - 1) + '/version_0/metrics.csv'
    logs = pd.read_csv(log_file_name)
    max_ind = logs['val_acc'].argmax()
    max_row = logs.iloc[max_ind]
    ckpt_directory = ckpt_folder_root + 'rns_active/active_checkpoints_' + strategy_name
    ckpt_files = os.listdir(ckpt_directory)
    load_file_name = strategy_name + '_round_' + str(rd - 1) + '-step=' + str(int(max_row['step']+1))
    print(load_file_name)
    
    log_file_name = log_folder_root + 'rns_active/active_logs_' + strategy_name + '/logger_round_' + str(
        rd) + '/version_0/metrics.csv'
    logs = pd.read_csv(log_file_name)
    max_ind = logs['val_acc'].argmax()
    max_row = logs.iloc[max_ind]
    sinds = np.argsort(list(logs['val_acc']))[::-1]
    ckpt_directory = ckpt_folder_root + 'rns_active/active_checkpoints_' + strategy_name
    ckpt_files = os.listdir(ckpt_directory)
    

    load_file_name = strategy_name + '_round_' + str(rd) + '-step=' + str(int(max_row['step']+1))
    print(load_file_name)
    
    if rd<7:
            
        ind = next((i for i, s in enumerate(ckpt_files) if load_file_name in s), None)
        print(ind, ckpt_files[ind])
        strategy.net.net = strategy.net.net.load_from_checkpoint(ckpt_directory + '/' + ckpt_files[ind], backbone=swav.backbone)
                
        with open(log_folder_root + 'rns_active_selected/' + strategy_name + '/' + 'selected_indices.pkl', 'rb') as f:
        # Load the content of the file into a Python object
            selected_inds = pickle.load(f)
                
        q_idxs = np.array(selected_inds[rd])
        strategy.update(q_idxs)
    
    else:

        ind = next((i for i, s in enumerate(ckpt_files) if load_file_name in s), None)
        print(ind, ckpt_files[ind])
        strategy.net.net = strategy.net.net.load_from_checkpoint(ckpt_directory + '/' + ckpt_files[ind], backbone=swav.backbone)
    
        q_idxs = strategy.query(NUM_QUERY*90)
    
        with open(log_folder_root + 'rns_active_selected/' + strategy_name + '/' + 'selected_indices.pkl', 'rb') as f:
        # Load the content of the file into a Python object
            selected_inds = pickle.load(f)
        selected_inds[rd] = q_idxs
        with open(log_folder_root + 'rns_active_selected/' + strategy_name + '/' + 'selected_indices.pkl', 'wb') as f:
            pickle.dump(selected_inds, f)
    # Now you can use the dictionary object as usual
        strategy.update(q_idxs)
        strategy.net.round = rd
        strategy.net.net.load_state_dict(modelstate)
        torch.cuda.empty_cache()
        strategy.train()
        torch.cuda.empty_cache()

round 1
LeastConfidence_round_0-step=20
LeastConfidence_round_1-step=240
11 LeastConfidence_round_1-step=240-train_loss=0.01148.ckpt
round 2
LeastConfidence_round_1-step=240
LeastConfidence_round_2-step=160
38 LeastConfidence_round_2-step=160-train_loss=0.01361.ckpt
round 3
LeastConfidence_round_2-step=160
LeastConfidence_round_3-step=180
57 LeastConfidence_round_3-step=180-train_loss=0.01796.ckpt
round 4
LeastConfidence_round_3-step=180
LeastConfidence_round_4-step=200
78 LeastConfidence_round_4-step=200-train_loss=0.01437.ckpt
round 5
LeastConfidence_round_4-step=200
LeastConfidence_round_5-step=360
106 LeastConfidence_round_5-step=360-train_loss=0.02000.ckpt
round 6
LeastConfidence_round_5-step=360
LeastConfidence_round_6-step=240
128 LeastConfidence_round_6-step=240-train_loss=0.02400.ckpt
round 7
LeastConfidence_round_6-step=240
LeastConfidence_round_7-step=240
150 LeastConfidence_round_7-step=240-train_loss=0.01333.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True 

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Sanity Checking: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


round 8
LeastConfidence_round_7-step=240
LeastConfidence_round_8-step=180
168 LeastConfidence_round_8-step=180-train_loss=0.02847.ckpt


Predicting: 0it [00:00, ?it/s]

TypeError: 'NoneType' object is not iterable

In [20]:
rd = 1
print('round ' + str(rd))
strategy_name = 'MeanSTD'
log_file_name = log_folder_root + 'rns_active/active_logs_' + strategy_name + '/logger_round_' + str(
    rd - 1) + '/version_0/metrics.csv'
logs = pd.read_csv(log_file_name)
max_ind = logs['val_acc'].argmax()
max_row = logs.iloc[max_ind]
ckpt_directory = ckpt_folder_root + 'rns_active/active_checkpoints_' + strategy_name
ckpt_files = os.listdir(ckpt_directory)
load_file_name = strategy_name + '_round_' + str(rd - 1) + '-step=' + str(int(max_row['step']+1))
print(load_file_name)

ind = next((i for i, s in enumerate(ckpt_files) if load_file_name in s), None)
print(ind, ckpt_files[ind])
strategy.net.net = strategy.net.net.load_from_checkpoint(ckpt_directory + '/' + ckpt_files[ind], backbone=swav.backbone)

q_idxs = strategy.query(NUM_QUERY*90)

# with open(log_folder_root + 'rns_active_selected/' + strategy_name + '/' + 'selected_indices.pkl', 'rb') as f:
# # Load the content of the file into a Python object
#     selected_inds = pickle.load(f)
# selected_inds[rd] = q_idxs
# with open(log_folder_root + 'rns_active_selected/' + strategy_name + '/' + 'selected_indices.pkl', 'wb') as f:
#     pickle.dump(selected_inds, f)
# Now you can use the dictionary object as usual
strategy.update(q_idxs)
strategy.net.round = rd
strategy.net.net.load_state_dict(modelstate)
torch.cuda.empty_cache()
strategy.train()
torch.cuda.empty_cache()

round 1
MeanSTD_round_0-step=60
2 MeanSTD_round_0-step=60-train_loss=0.01130.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

100%|███████████████████████████████████████████████████████████| 1710/1710 [13:19<00:00,  2.14it/s]


[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True 

AssertionError: 

In [33]:
n = 50
from sklearn.decomposition import PCA
labeled_idxs, train_data = strategy.dataset.get_train_data_unaugmented()
embeddings, embeddings_t, seq_len = strategy.get_embeddings(train_data)
embeddings = embeddings_t.numpy()

labeled_idxs_copy = deepcopy(labeled_idxs)

# # downsampling embeddings if feature dim > 50
# if len(embeddings[0]) > 50:
#     pca = PCA(n_components=50)
#     embeddings = pca.fit_transform(embeddings)
# embeddings = embeddings.astype(np.float16)
# 
# norm_data = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
# dist_mat = np.dot(norm_data, norm_data.T)
# mat = dist_mat[~labeled_idxs, :][:, labeled_idxs]
# 
# for i in tqdm(range(n), ncols=100):
#     mat_min = mat.min(axis=1)
#     q_idx_ = mat_min.argmax()
#     q_idx = np.arange(strategy.dataset.n_pool)[~labeled_idxs][q_idx_]
#     labeled_idxs[q_idx] = True
#     mat = np.delete(mat, q_idx_, 0)
#     mat = np.append(mat, dist_mat[~labeled_idxs, q_idx][:, None], axis=1)
# output = np.arange(strategy.dataset.n_pool)[(strategy.dataset.labeled_idxs ^ labeled_idxs)]
# norm_data = embeddings_t / np.linalg.norm(embeddings_t, axis=1, keepdims=True)
# norm_data_core = embeddings_t[output] / np.linalg.norm(embeddings_t[output], axis=1, keepdims=True)
# similarity_matrix = np.dot(norm_data, norm_data_core.T)
# dis = np.min(similarity_matrix, 1)
# dis[output] = 1
# uncertainties = dis
# to_select = strategy.metrics_distribution_rescaling(uncertainties, seq_len, labeled_idxs, n, descending=True)
# unlabeled_idxs, _ = strategy.dataset.get_unlabeled_data()
# print('selected', np.sum(to_select))
# assert len(to_select) == len(unlabeled_idxs)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

In [34]:
norm_data = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
dist_mat = np.dot(norm_data, norm_data.T)
mat = dist_mat[~labeled_idxs, :][:, labeled_idxs]



In [38]:
from sklearn.metrics import pairwise_distances
from scipy import stats
def init_centers(X, K):
    ind = np.argmax([np.linalg.norm(s, 2) for s in X])
    mu = [X[ind]]
    indsAll = [ind]
    centInds = [0.] * len(X)
    cent = 0
    dist = []
    print('#Samps\tTotal Distance')
    while len(mu) < K:
        if len(mu) == 1:
            D2 = pairwise_distances(X, mu).ravel().astype(float)
        else:
            newD = pairwise_distances(X, [mu[-1]]).ravel().astype(float)
            for i in range(len(X)):
                if D2[i] >  newD[i]:
                    centInds[i] = cent
                    D2[i] = newD[i]
        print(str(len(mu)) + '\t' + str(sum(D2)), flush=True)
        # if sum(D2) == 0.0: pdb.set_trace()
        dist.append(sum(D2))
        D2 = D2.ravel().astype(float)
        Ddist = (D2 ** 2)/ sum(D2 ** 2)
        customDist = stats.rv_discrete(name='custm', values=(np.arange(len(D2)), Ddist))
        ind = customDist.rvs(size=1)[0]
        while ind in indsAll: ind = customDist.rvs(size=1)[0]
        mu.append(X[ind])
        indsAll.append(ind)
        cent += 1
    return indsAll,dist

In [39]:
unlabeled_idxs, unlabeled_data = strategy.dataset.get_train_data_unaugmented()
gradEmbedding, seq_len = strategy.get_grad_embeddings(unlabeled_data)
chosen, dist = init_centers(gradEmbedding, 5*n)
fillin_metrics = np.zeros(len(unlabeled_idxs))
for i in range(len(chosen) - 1):
    fillin_metrics[chosen[i]] = dist[i]
fillin_metrics[fillin_metrics.argsort()[-1]] = fillin_metrics[fillin_metrics.argsort()[-2]]
uncertainties = fillin_metrics
to_select = strategy.metrics_distribution_rescaling(uncertainties, seq_len, unlabeled_idxs, n)
unlabeled_idxs, _ = strategy.dataset.get_unlabeled_data()
print('selected', np.sum(to_select))

assert len(to_select) == len(unlabeled_idxs)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

#Samps	Total Distance
1	91954.29799774109
2	36888.86382275936
3	34325.98988288736
4	29751.294698393645
5	27754.661528141
6	26672.595411641236
7	26134.84792271247
8	25814.182172446504
9	25161.49004465202
10	25089.674203775237
11	25017.36734314845
12	24431.547242161556
13	23548.123986238115
14	23542.379480949476
15	23328.207193399918
16	23192.096182291138
17	22613.528796658356
18	22461.36411741375
19	22244.9644297939
20	22074.18983422627
21	22004.12830009629
22	21932.474521026154
23	21926.519339292132
24	21856.157033353138
25	21787.565586386194
26	21680.66272477011
27	21626.984085754862
28	21608.51903834848
29	21515.666649753428
30	21453.467028336137
31	21384.817326520417
32	21362.055406111478
33	21258.97604452031
34	21172.77627804998
35	21154.525784243335
36	21073.26730245395
37	20988.336592283675
38	20925.690262454806
39	20871.391722560886
40	20766.333944792095
41	20656.39961538286
42	20617.356237571188
43	20587.376899415936
44	20530.926963346363
45	20487.759574093077
46	20471.83200576

In [35]:
n = 100
for i in tqdm(range(n), ncols=100):
    mat_min = mat.min(axis=1)
    q_idx_ = mat_min.argmax()
    q_idx = np.arange(strategy.dataset.n_pool)[~labeled_idxs][q_idx_]
    labeled_idxs[q_idx] = True
    mat = np.delete(mat, q_idx_, 0)
    mat = np.append(mat, dist_mat[~labeled_idxs, q_idx][:, None], axis=1)

output = np.arange(strategy.dataset.n_pool)[(strategy.dataset.labeled_idxs ^ labeled_idxs)]

norm_data = embeddings_t / np.linalg.norm(embeddings_t, axis=1, keepdims=True)
norm_data_core = embeddings_t[output] / np.linalg.norm(embeddings_t[output], axis=1, keepdims=True)
similarity_matrix = np.dot(norm_data, norm_data_core.T)
dis = np.min(similarity_matrix, 1)
dis[output] = 1
uncertainties = dis
to_select = strategy.metrics_distribution_rescaling(uncertainties, seq_len, labeled_idxs_copy, n, descending=True)
unlabeled_idxs, _ = strategy.dataset.get_unlabeled_data()
print('selected', np.sum(to_select))
assert len(to_select) == len(unlabeled_idxs)


  0%|                                                                       | 0/100 [00:00<?, ?it/s][A
  1%|▋                                                              | 1/100 [00:00<00:19,  4.95it/s][A
  2%|█▎                                                             | 2/100 [00:00<00:18,  5.27it/s][A
  3%|█▉                                                             | 3/100 [00:00<00:18,  5.36it/s][A
  4%|██▌                                                            | 4/100 [00:00<00:17,  5.39it/s][A
  5%|███▏                                                           | 5/100 [00:00<00:17,  5.43it/s][A
  6%|███▊                                                           | 6/100 [00:01<00:17,  5.49it/s][A
  7%|████▍                                                          | 7/100 [00:01<00:17,  5.46it/s][A
  8%|█████                                                          | 8/100 [00:01<00:16,  5.46it/s][A
  9%|█████▋                                                    

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True]
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True 

In [30]:
labeled_idxs

array([False, False, False, ..., False, False, False])

In [24]:
uncertainties

array([0.6737734 , 0.7429988 , 0.7924017 , ..., 0.63798904, 0.6672902 ,
       0.6094841 ], dtype=float32)

In [23]:
dis

array([0.6737734 , 0.7429988 , 0.7924017 , ..., 0.63798904, 0.6672902 ,
       0.6094841 ], dtype=float32)

In [31]:
unlabeled_idxs

array([    0,     1,     2, ..., 85108, 85109, 85110])

In [21]:
labeled_idxs

array([False, False, False, ..., False, False, False])

In [29]:
len(to_select)

84082