#Install libraries

In [None]:
!pip install transformers
!pip install torch torchvision
!pip install pandas
!pip install numpy
!pip install datasets
!pip install pytorch_transformers
!pip install scikit-learn
!pip install matplotlib
!pip install seaborn
!pip install nltk

Import the required libraries

In [None]:
import torch
from torch.utils.data import (TensorDataset, DataLoader,
                              RandomSampler, SequentialSampler)

from pytorch_transformers import BertTokenizer, BertConfig
from pytorch_transformers import BertForSequenceClassification
from pytorch_transformers import AdamW, WarmupLinearSchedule

from distutils.version import LooseVersion as LV

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve, precision_recall_curve
import torch.nn.functional as F
import io
from scipy.spatial.distance import mahalanobis
import pandas as pd
import numpy as np

import matplotlib
matplotlib.use('Agg')
#matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import seaborn as sns

from datasets import load_dataset

import tensorflow_datasets as tfds

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords


sns.set()

if torch.cuda.is_available():
    device = torch.device('cuda')
    devicename = '['+torch.cuda.get_device_name(0)+']'
else:
    device = torch.device('cpu')
    devicename = ""
    
print('Using PyTorch version:', torch.__version__,
      'Device:', device, devicename)
assert(LV(torch.__version__) >= LV("1.0.0"))


Download the IMDb and SST-2 datasets and extract them.

In [None]:
# Load the IMDB dataset
imdb_dataset = load_dataset("imdb")


# Load the SST-2 dataset
sst2_dataset = load_dataset("glue", "sst2")

Load the IMDb dataset using pandas, and preprocess the text data by removing HTML tags, non-alphanumeric characters, and stop words.

In [15]:
print(imdb_dataset.column_names)

{'train': ['text', 'label'], 'test': ['text', 'label'], 'unsupervised': ['text', 'label']}


In [16]:
# Load the IMDb dataset
imdb_df = pd.concat([pd.DataFrame(imdb_dataset['train']),pd.DataFrame(imdb_dataset['test'])])
imdb_df = imdb_df.reset_index(drop=True)


print('\nIMDB data loaded:')
print('data set:', imdb_df.shape)
print(imdb_df['label'].unique())


IMDB data loaded:
data set: (50000, 2)
[0 1]


Load the SST-2 dataset using pandas, and preprocess the text data in the same way as the IMDb dataset

In [17]:
print(sst2_dataset.column_names)

{'train': ['sentence', 'label', 'idx'], 'validation': ['sentence', 'label', 'idx'], 'test': ['sentence', 'label', 'idx']}


In [18]:
# Load the SST-2 dataset

sst2_df = pd.concat([pd.DataFrame(sst2_dataset['train'])[['sentence', 'label']],pd.DataFrame(sst2_dataset['validation'])[['sentence', 'label']]])
sst2_df = sst2_df.rename(columns={'sentence': 'text'})
sst2_df = sst2_df.reset_index(drop=True)


print('\nSST2 data loaded:')
print('data set:', sst2_df.shape)
print(sst2_df['label'].unique())



SST2 data loaded:
data set: (68221, 2)
[0 1]


In [19]:
# Preprocess the text data
sst2_df ['text'] = sst2_df ['text'].str.replace('<.*?>', '', regex=True) # remove HTML tags
sst2_df ['text'] = sst2_df ['text'].str.replace('[^a-zA-Z0-9\s]', '', regex=True) # remove non-alphanumeric characters
stop_words = set(stopwords.words('english'))
sst2_df ['text'] = sst2_df ['text'].apply(lambda x: ' '.join([word for word in x.split() if word not in stop_words])) # remove stop words

In [20]:
# Let's view some random reviews:
print(sst2_df.sample(5))

                                                    text  label
41637  rare family movie genuine sweet without relyin...      1
28062                                    even reassuring      1
12011                           subtlety never trademark      0
41550  psychology real narrative logic series careful...      0
48956  pop cinematic year already littered celluloid ...      0


Split into train and test set

In [21]:
# Define your features and target variable
X = sst2_df.drop("label", axis=1)
y = sst2_df["label"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the shape of the train and test sets
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

sst2_train_df = pd.concat([X_train,y_train], axis=1)
sst2_test_df = pd.concat([X_test,y_test], axis=1)

print('\nSST2 data re splitted:')
print('train:', sst2_train_df.shape)
print('test:', sst2_test_df.shape)
print(sst2_train_df['label'].unique())
print(sst2_test_df['label'].unique())


X_train shape: (54576, 1)
X_test shape: (13645, 1)
y_train shape: (54576,)
y_test shape: (13645,)

SST2 data re splitted:
train: (54576, 2)
test: (13645, 2)
[1 0]
[1 0]


In [22]:
# Let's view some random reviews:
print(sst2_train_df.sample(5))
print(sst2_test_df.sample(5))

                                                 text  label
55980  filmmakers want nothing else show us good time      1
58818                           one alternate reality      0
34216                                  nt funny hoped      0
64863                       nt even bother rent video      0
35549                                      disturbing      0
                                                    text  label
27840                           supposed romantic comedy      0
41442  300 hundred years russian cultural identity st...      1
49660                           enervating determination      1
54246  secretary takes unexpected material handles un...      1
32648                                               back      0


IN-DS: SST2
OOD-DS: IMDB

In [23]:
#Temporary limit the IN-DS size to 15%
n = 0.15

train_df = sst2_train_df.sample(int(n*sst2_train_df.shape[0]))
test_df = sst2_test_df.sample(int(n*sst2_test_df.shape[0]))

train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)


n_ood = 0.3
ood_df = imdb_df.sample(int(n_ood*train_df.shape[0]))
ood_df = ood_df.reset_index(drop=True)

In [24]:
# Preprocess the text data
ood_df ['text'] = ood_df ['text'].str.replace('<.*?>', '', regex=True) # remove HTML tags
ood_df ['text'] = ood_df ['text'].str.replace('[^a-zA-Z0-9\s]', '', regex=True) # remove non-alphanumeric characters
stop_words = set(stopwords.words('english'))
ood_df ['text'] = ood_df ['text'].apply(lambda x: ' '.join([word for word in x.split() if word not in stop_words])) # remove stop words


# Let's view some random reviews:
print(ood_df.sample(5))

                                                   text  label
2204  I saw kung fu movie I kid I thought cool Now I...      1
363   In film I prefer Deacon Frost Hes sexy I love ...      1
1668  There one film I think might good better one c...      1
1144  Not best Lone Star series moves along quickly ...      0
2100  Relentlessly stupid nobudget war picture made ...      0


In [25]:
del X_train, X_test, y_train, y_test

The token `[CLS]` is a special token required by BERT at the beginning of the sentence.

In [26]:
sentences_train = train_df.text.values
sentences_train = ["[CLS] " + s for s in sentences_train]

sentences_test = test_df.text.values
sentences_test = ["[CLS] " + s for s in sentences_test]

sentences_ood = ood_df.text.values
sentences_ood = ["[CLS] " + s for s in sentences_ood]


labels_train = train_df.label.values
labels_test  = test_df.label.values
labels_ood  = ood_df.label.values

print ("\nThe first training sentence:")
print(sentences_train[0], 'LABEL:', labels_train[0])



The first training sentence:
[CLS] dumb gags anatomical humor character cliches LABEL: 0


Next we use the BERT tokenizer to convert the sentences into tokens
that match the data BERT was trained on.


In [27]:
BERTMODEL = "bert-base-uncased"

tokenizer = BertTokenizer.from_pretrained(BERTMODEL,
                                          do_lower_case=True)

tokenized_train = [tokenizer.tokenize(s) for s in sentences_train]
tokenized_test  = [tokenizer.tokenize(s) for s in sentences_test]
tokenized_ood  = [tokenizer.tokenize(s) for s in sentences_ood]

print ("\nThe full tokenized first training sentence:")
print (tokenized_train[0])

print ("\nThe full tokenized first test sentence:")
print (tokenized_test[0])

print ("\nThe full tokenized first OOD sentence:")
print (tokenized_ood[0])






  0%|          | 0/231508 [00:00<?, ?B/s][A[A[A[A[A




  0%|          | 1024/231508 [00:00<00:49, 4624.15B/s][A[A[A[A[A




 15%|█▌        | 34816/231508 [00:00<00:02, 90346.52B/s][A[A[A[A[A




 45%|████▌     | 104448/231508 [00:00<00:00, 187808.10B/s][A[A[A[A[A




100%|██████████| 231508/231508 [00:00<00:00, 253488.13B/s]



The full tokenized first training sentence:
['[CLS]', 'dumb', 'gag', '##s', 'anatomical', 'humor', 'character', 'cl', '##iche', '##s']

The full tokenized first test sentence:
['[CLS]', 'blockbuster', '##s', 'poll', '##ute', 'summer', 'movie', 'pool']

The full tokenized first OOD sentence:
['[CLS]', 'spoil', '##ers', '##i', 'pleasantly', 'surprised', 'find', 'harsh', 'criticisms', 'acting', 'dated', 'dialogue', 'unclear', 'storyline', 'un', '##founded', 'bela', '##fo', '##nte', 'great', 'brand', '##oes', '##que', 'menacing', 'swearing', 'spirit', 'must', 'earn', 'wings', 'realistic', '##ally', 'ill', '##e', '##qui', '##pped', 'past', 'life', 'he', 'learns', 'late', 'empty', 'hu', '##st', '##ling', 'material', '##istic', 'life', 'without', 'love', 'most', '##el', 'likewise', 'great', 'anguish', '##ed', 'man', 'dying', 'wife', 'fanny', 'in', 'spite', 'prayers', 'miracle', 'bitterness', 'prevents', 'accepting', 'believing', 'one', 'the', 'two', 'social', 'worlds', 'characters', 'represe


Now we set the maximum sequence lengths for our training and test
sentences as `MAX_LEN_TRAIN` and `MAX_LEN_TEST`. The maximum length
supported by the used BERT model is 512.

The token `[SEP]` is another special token required by BERT at the
end of the sentence.

In [28]:
MAX_LEN_TRAIN, MAX_LEN_TEST = 128, 512

tokenized_train = [t[:(MAX_LEN_TRAIN-1)]+['SEP'] for t in tokenized_train]
tokenized_test  = [t[:(MAX_LEN_TEST-1)]+['SEP'] for t in tokenized_test]
tokenized_ood  = [t[:(MAX_LEN_TEST-1)]+['SEP'] for t in tokenized_ood]

print ("\nThe truncated tokenized first training sentence:")
print (tokenized_train[0])


The truncated tokenized first training sentence:
['[CLS]', 'dumb', 'gag', '##s', 'anatomical', 'humor', 'character', 'cl', '##iche', '##s', 'SEP']



Next we use the BERT tokenizer to convert each token into an integer
index in the BERT vocabulary. We also pad any shorter sequences to
`MAX_LEN_TRAIN` or `MAX_LEN_TEST` indices with trailing zeros.

In [29]:
ids_train = [tokenizer.convert_tokens_to_ids(t) for t in tokenized_train]
ids_train = np.array([np.pad(i, (0, MAX_LEN_TRAIN-len(i)),
                             mode='constant') for i in ids_train])

ids_test = [tokenizer.convert_tokens_to_ids(t) for t in tokenized_test]
ids_test = np.array([np.pad(i, (0, MAX_LEN_TEST-len(i)),
                            mode='constant') for i in ids_test])


ids_ood = [tokenizer.convert_tokens_to_ids(t) for t in tokenized_ood]
ids_ood = np.array([np.pad(i, (0, MAX_LEN_TEST-len(i)),
                            mode='constant') for i in ids_ood])

print ("\nThe indices of the first training sentence:")
print (ids_train[0])


The indices of the first training sentence:
[  101 12873 18201  2015 28141  8562  2839 18856 17322  2015   100     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0]


BERT also requires *attention masks*, with 1 for each real token in
the sequences and 0 for the padding:

In [30]:
amasks_train, amasks_test , amasks_ood = [], [] , []

for seq in ids_train:
  seq_mask = [float(i>0) for i in seq]
  amasks_train.append(seq_mask)

for seq in ids_test:
  seq_mask = [float(i>0) for i in seq]
  amasks_test.append(seq_mask)


for seq in ids_ood:
  seq_mask = [float(i>0) for i in seq]
  amasks_ood.append(seq_mask)

We use scikit-learn's train_test_split() to use 10% of our training
data as a validation set, and then convert all data into
torch.tensors.

In [31]:
(train_inputs, validation_inputs,
 train_labels, validation_labels) = train_test_split(ids_train, labels_train,
                                                     random_state=42,
                                                     test_size=0.1)
(train_masks, validation_masks,
 _, _) = train_test_split(amasks_train, ids_train,
                          random_state=42, test_size=0.1)

train_inputs = torch.tensor(train_inputs)
train_labels = torch.tensor(train_labels)
train_masks  = torch.tensor(train_masks)
validation_inputs = torch.tensor(validation_inputs)
validation_labels = torch.tensor(validation_labels)
validation_masks  = torch.tensor(validation_masks)
test_inputs = torch.tensor(ids_test)
test_labels = torch.tensor(labels_test)
test_masks  = torch.tensor(amasks_test)
ood_inputs = torch.tensor(ids_ood)
ood_labels = torch.tensor(labels_ood)
ood_masks  = torch.tensor(amasks_ood)



Next we create PyTorch *DataLoader*s for all data sets.
For fine-tuning BERT on a specific task, the authors recommend a
batch size of 16 or 32.

In [32]:
BATCH_SIZE = 8

print('\nDatasets:')
print('Train: ', end="")
train_data = TensorDataset(train_inputs, train_masks,
                           train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler,
                              batch_size=BATCH_SIZE)
print(len(train_data), 'reviews')

print('Validation: ', end="")
validation_data = TensorDataset(validation_inputs, validation_masks,
                                validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data,
                                   sampler=validation_sampler,
                                   batch_size=BATCH_SIZE)
print(len(validation_data), 'reviews')

print('Test: ', end="")
test_data = TensorDataset(test_inputs, test_masks, test_labels)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler,
                             batch_size=BATCH_SIZE)
print(len(test_data), 'reviews')


print('OOD: ', end="")
ood_data = TensorDataset(ood_inputs, ood_masks, ood_labels)
ood_sampler = SequentialSampler(ood_data)
ood_dataloader = DataLoader(ood_data, sampler=ood_sampler,
                             batch_size=BATCH_SIZE)
print(len(ood_data), 'reviews')


Datasets:
Train: 7367 reviews
Validation: 819 reviews
Test: 2046 reviews
OOD: 2455 reviews


BERT MODEL INITIALIZATION

We now load a pretrained BERT model with a single linear
classification layer added on top.


In [33]:
model = BertForSequenceClassification.from_pretrained(BERTMODEL,
                                                      num_labels=2,
                                                      output_hidden_states=True)


model.cuda()
print('\nPretrained BERT model "{}" loaded'.format(BERTMODEL))


100%|██████████| 433/433 [00:00<00:00, 135532.36B/s]
100%|██████████| 440473133/440473133 [00:37<00:00, 11650771.84B/s]



Pretrained BERT model "bert-base-uncased" loaded



We set the remaining hyperparameters needed for fine-tuning the
pretrained model: 
 * EPOCHS: the number of training epochs in fine-tuning
   (recommended values between 2 and 4) 
 * WEIGHT_DECAY: weight decay for the Adam optimizer 
 * LR: learning rate for the Adam optimizer 
   (2e-5 to 5e-5 recommended) 
 * WARMUP_STEPS: number of warmup steps to (linearly) reach the
   set learning rate

 We also need to grab the training parameters from the pretrained
 model.

In [34]:
EPOCHS = 4
WEIGHT_DECAY = 0.01
LR = 2e-5
WARMUP_STEPS =int(0.2*len(train_dataloader))

no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)],
     'weight_decay': WEIGHT_DECAY},
    {'params': [p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)],
     'weight_decay': 0.0}
]
optimizer = AdamW(optimizer_grouped_parameters, lr=LR, eps=1e-8)
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=WARMUP_STEPS,
                                 t_total=len(train_dataloader)*EPOCHS)

LEARNING

Let's now define functions to train() and evaluate() the model:

In [35]:
def train(epoch, loss_vector=None, log_interval=200):
    # Set model to training mode
    model.train().to(device)

    # Loop over each batch from the training set
    for step, batch in enumerate(train_dataloader):
        # Copy data to GPU if needed
        b_input_ids, b_input_mask, b_labels = tuple(t.to(device) for t in batch)

        # Zero gradient buffers
        optimizer.zero_grad()

        with torch.set_grad_enabled(True):
            # Forward pass
            loss = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask,
                         labels=b_labels)[0]

        if loss_vector is not None:
            loss_vector.append(loss.item())

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()
        scheduler.step()

        # Clear unused variables
        del  b_input_mask, b_labels

        if step % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                  epoch, step * len(b_input_ids), len(train_dataloader.dataset),
                  100. * step / len(train_dataloader), loss.item()))
            
    # Clear unused variables
    del b_input_ids,batch, loss


In [36]:
def evaluate(loader):
  model.eval()

  n_correct, n_all = 0, 0

  for batch in loader:
    batch = tuple(t.to(device) for t in batch)
    b_input_ids, b_input_mask, b_labels = batch

    with torch.no_grad():
      outputs = model(b_input_ids, token_type_ids=None,
                      attention_mask=b_input_mask)
      return outputs
      logits = outputs[0]

    logits = logits.detach().cpu().numpy()
    predictions = np.argmax(logits, axis=1)

    labels = b_labels.to('cpu').numpy()
    n_correct += np.sum(predictions == labels)
    n_all += len(labels)

  print('Accuracy: [{}/{}] {:.4f}'.format(n_correct, n_all,
                                          n_correct/n_all))



    

Now we are ready to train our model using the train()
function. After each epoch, we evaluate the model using the
validation set and evaluate().

In [None]:
train_lossv = []
for epoch in range(1, EPOCHS + 1):
    print()
    train(epoch, train_lossv)
    print('\nValidation set:')
    evaluate(validation_dataloader)



```
# Ce texte est au format code
```

# OOD detection 

In [53]:
#define the function that calculates the metrics 
def metrics(scores: np.ndarray, labels: np.ndarray, threshold: float):
    pos = np.where(scores >= threshold) 
    neg = np.where(scores < threshold)
    n_pos = len(pos[0])
    n_neg = len(neg[0])

    tp = np.sum(labels[pos])
    fp = n_pos - tp
    fn = np.sum(labels[neg])
    tn = n_neg - fn

    FPR = fp / (fp + tn)
    Accuracy = (tp+tn)/len(scores)
    ERR = 1 - Accuracy
    return FPR, ERR

In [49]:
#the aggregation function where we define the number of layers used
def aggregation(all_layers,num_layers):
  agg_layers = all_layers[-1][:,0,:]
  for i in range(-2,-num_layers,-1):
    agg_layers += (1/num_layers)*all_layers[-i][:,0,:]
  return agg_layers
    




In [51]:
def extract_bert_features(loader,num_layers):
    model.eval()

    label_list = []
    pred_list = []
    agg_layers_list = []
    for batch in loader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        with torch.no_grad():
            outputs = model(b_input_ids, token_type_ids=None,
                            attention_mask=b_input_mask)
            agg_layers = aggregation(outputs[1],num_layers)
            logits = outputs[0]
            predictions = np.argmax(logits.detach().cpu().numpy(), axis=1)

        agg_layers_list.append(agg_layers)
        label_list.append(b_labels.cpu().numpy())
        pred_list.append(predictions)

    agg_layers_list = torch.cat(agg_layers_list, dim=0).to('cpu').numpy()
    labels = np.concatenate(label_list, axis=0)
    predictions = np.concatenate(pred_list, axis=0)

    return agg_layers_list, labels, predictions

In [54]:
def train_distribution(train_dataloader,test_dataloader,ood_dataloader,num_layers): 
  #returns all the features extracted from BERT for IN-data and OOD-data
  train_features = extract_bert_features(train_dataloader,num_layers)
  test_features = extract_bert_features(test_dataloader,num_layers)
  ood_features = extract_bert_features(ood_dataloader,num_layers)

  train_mean = np.mean(train_features[0], axis=0) 
               
  train_cov = np.cov(train_features[0], rowvar=False)
                                          
  train_inv_cov = np.linalg.inv(train_cov)

  return train_mean,train_cov,train_inv_cov,test_features,ood_features



In [55]:
def results(train_dataloader,test_dataloader,ood_dataloader,num_layers):

  a = train_distribution(train_dataloader,test_dataloader,ood_dataloader,num_layers)
  
  train_mean = a[0]
  train_cov = a[1]
  train_inv_cov = a[2]
  test_features = a[3]
  ood_features = a[4]

  inds_test_scores = []
  ood_test_scores = []

  for feature in test_features[0]:
     score = mahalanobis(feature, train_mean, train_inv_cov)
     inds_test_scores.append(score)

  for feature in ood_features[0]:
    score = mahalanobis(feature, train_mean, train_inv_cov)
    ood_test_scores.append(score)

  
  labels = np.concatenate([np.zeros(len(inds_test_scores)), np.ones(len(ood_test_scores))])
  scores = np.concatenate([inds_test_scores, ood_test_scores])

  threshold = np.mean(inds_test_scores) +  np.std(inds_test_scores)

  FPR, ERR = metrics (scores, labels, threshold)
  auroc = roc_auc_score(labels, scores)
  aupr = average_precision_score(labels, scores)

  return auroc,aupr,ERR



In [60]:
AUROC = []
AUPR = []
ERR = []

for num_layers in range(2,15):
  auroc,aupr,err = results(train_dataloader,test_dataloader,ood_dataloader,num_layers)
  AUROC.append(auroc)
  AUPR.append(aupr)
  ERR.append(err)
  print(str(num_layers))
  print('AUROC:', auroc)
  print('AUPR:', aupr)
  print('ERR:', ERR)
  
 

2
AUROC: 0.7712371066289995
AUPR: 0.7311687097273454
ERR: [0.36991779604532327]
3
AUROC: 0.7845787219809951
AUPR: 0.7452470359975572
ERR: [0.36991779604532327, 0.35525438791379693]
4
AUROC: 0.7929067297374242
AUPR: 0.7518104322184359
ERR: [0.36991779604532327, 0.35525438791379693, 0.34770051099755606]
5
AUROC: 0.7922272458505295
AUPR: 0.7527720070035466
ERR: [0.36991779604532327, 0.35525438791379693, 0.34770051099755606, 0.3479226838480338]
6
AUROC: 0.7807017019946525
AUPR: 0.7428963578289083
ERR: [0.36991779604532327, 0.35525438791379693, 0.34770051099755606, 0.3479226838480338, 0.3628082648300378]
7
AUROC: 0.7737856191505754
AUPR: 0.7395086995182332
ERR: [0.36991779604532327, 0.35525438791379693, 0.34770051099755606, 0.3479226838480338, 0.3628082648300378, 0.3696956231948456]
8
AUROC: 0.7521615869621915
AUPR: 0.7156088034477636
ERR: [0.36991779604532327, 0.35525438791379693, 0.34770051099755606, 0.3479226838480338, 0.3628082648300378, 0.3696956231948456, 0.39857809375694286]
9
AUROC:

IndexError: ignored

FIN SAMI