# BETO Fine-Tuning Acronym Classification

M.Elena García García

## Enviroment

### Activating the GPU

In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

### Installing the Hugging Face PyTorch Interface for Bert

In [None]:
!pip install -q transformers=='4.6.1'

### Importing the modules

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertConfig, BertForMaskedLM
from transformers import AdamW, BertForSequenceClassification, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
import pandas as pd
import io
import numpy as np
import matplotlib.pyplot as plt
from torch.nn import functional as F
% matplotlib inline

In [None]:
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

### Functions

In [None]:
def metric_fn(labels, preds):
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    return {
        "eval_f1": f1,
        "eval_recall": recall,
        "eval_precision": precision,
    }

In [None]:
#Creating the Accuracy Measurement Function
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

### Specifying CUDA as the device for Torch

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

## Data

In [None]:
df = pd.read_csv("../data/data_train/train_data_beto_10_NOamb_lfnorm_medline.csv", delimiter=',')
df.shape

In [None]:
df.sample(10)

In [None]:
df = df[['short_form', 'context','long_form', 'label']]

In [None]:
# Adding CLS and SEP tokens at the beginning and end of each LF for BETO
df['sentences'] = df['long_form'].map(lambda x: "[CLS] " + x + " [SEP] ")+df['context'] + ' [SEP]'

In [None]:
df.head()

In [None]:
#Creating sentence, label lists and adding Bert tokens
sentences = df.sentences.values

labels = df.label.values

In [None]:
sentences[:3]

### Activating the BETO Tokenizer

In [None]:
tokenizer = BertTokenizer.from_pretrained('dccuchile/bert-base-spanish-wwm-cased', is_split_into_words = True, additional_special_tokens = ['<start>', '<end>'])
tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
print ("Tokenize the first sentence:")
print (tokenized_texts[0])


### Processing Data

In [None]:
# Set the maximum sequence length.
MAX_LEN = 128

# Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]

# Pad our input tokens
input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")

In [None]:
input_ids[0]

In [None]:
#Create attention masks
attention_masks = []

# Create a mask of 1s for each token followed by 0s for padding
for seq in input_ids:
  seq_mask = [float(i>0) for i in seq]
  attention_masks.append(seq_mask)

In [None]:
#Splitting data into train and validation sets
# Use train_test_split to split our data into train and validation sets for training

train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels, 
                                                            random_state=2018, test_size=0.1)
train_masks, validation_masks, _, _ = train_test_split(attention_masks, input_ids,
                                             random_state=2018, test_size=0.1)

In [None]:
#Converting all the data into torch tensors
# Torch tensors are the required datatype for our model

train_inputs = torch.tensor(train_inputs)
validation_inputs = torch.tensor(validation_inputs)
train_labels = torch.tensor(train_labels)
validation_labels = torch.tensor(validation_labels)
train_masks = torch.tensor(train_masks)
validation_masks = torch.tensor(validation_masks)

In [None]:
#Selecting a Batch Size and Creating and Iterator 
batch_size = 32

# Create an iterator of our data with torch DataLoader. This helps save on memory during training because, unlike a for loop, 
# with an iterator the entire dataset does not need to be loaded into memory

train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)


# Bert Configuration 

In [None]:
try:
    import transformers
except:
    print("Installing transformers")
    !pip -qq install transformers
  

# Initializing a model from the bero-basestyle configuration
model1 = BertForSequenceClassification.from_pretrained('dccuchile/bert-base-spanish-wwm-uncased', num_labels = 2)
model2 = BertForSequenceClassification.from_pretrained('dccuchile/bert-base-spanish-wwm-cased', num_labels = 2)
#model3 = BertForSequenceClassification.from_pretrained('bert-base-multilingual-uncased', num_labels = 2)

# Accessing the model configuration
configuration1 = model1.config
configuration2 = model2.config
#configuration3 = model3.config
#print(configuration1)

In [None]:
model1.cuda()
model2.cuda()
#model3.cuda()

### Optimizer Grouped Parameters

In [None]:
param_optimizer1 = list(model1.named_parameters())
param_optimizer2 = list(model2.named_parameters())
#param_optimizer3 = list(model3.named_parameters())
no_decay = ['bias', 'LayerNorm.weight']
# Separate the `weight` parameters from the `bias` parameters. 
# - For the `weight` parameters, this specifies a 'weight_decay_rate' of 0.01. 
# - For the `bias` parameters, the 'weight_decay_rate' is 0.0. 

optimizer_grouped_parameters1 = [
    # Filter for all parameters which *don't* include 'bias', 'gamma', 'beta'.
    {'params': [p for n, p in param_optimizer1 if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.1},
    
    # Filter for parameters which *do* include those.
    {'params': [p for n, p in param_optimizer1 if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}
]

optimizer_grouped_parameters2 = [
    # Filter for all parameters which *don't* include 'bias', 'gamma', 'beta'.
    {'params': [p for n, p in param_optimizer2 if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.1},
    
    # Filter for parameters which *do* include those.
    {'params': [p for n, p in param_optimizer2 if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}
]

#optimizer_grouped_parameters3 = [
#    # Filter for all parameters which *don't* include 'bias', 'gamma', 'beta'.
#    {'params': [p for n, p in param_optimizer3 if not any(nd in n for nd in no_decay)],
#     'weight_decay_rate': 0.1},
#    
#    # Filter for parameters which *do* include those.
#    {'params': [p for n, p in param_optimizer3 if any(nd in n for nd in no_decay)],
#     'weight_decay_rate': 0.0}
#]
# Note - `optimizer_grouped_parameters` only includes the parameter values, not 
# the names.

### The Hyperparemeters for the Training Loop

In [None]:
# Number of training epochs (authors recommend between 2 and 4)
epochs = 8

optimizer1 = AdamW(optimizer_grouped_parameters1,
                  lr = 1e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5
                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.
                  )
optimizer2 = AdamW(optimizer_grouped_parameters2,
                  lr = 1e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5
                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.
                  )
#optimizer3 = AdamW(optimizer_grouped_parameters3,
#                  lr = 1e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5
#                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.
#                  )
# Total number of training steps is number of batches * number of epochs.
# `train_dataloader` contains batched data so `len(train_dataloader)` gives 
# us the number of batches.
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler1 = get_linear_schedule_with_warmup(optimizer1, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)
scheduler2 = get_linear_schedule_with_warmup(optimizer2, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)
#scheduler3 = get_linear_schedule_with_warmup(optimizer3, 
#                                            num_warmup_steps = 0, # Default value in run_glue.py
#                                            num_training_steps = total_steps)

# The Training Loop

### Model 1

In [None]:
t = [] 

# Store our loss and accuracy for plotting
train_loss_set = []

# trange is a tqdm wrapper around the normal python range
for _ in trange(epochs, desc="Epoch"):
  
  
  # Training
  
  # Set our model to training mode (as opposed to evaluation mode)
  model1.train()
  
  # Tracking variables
  tr_loss = 0
  nb_tr_examples, nb_tr_steps = 0, 0

  
  # Train the data for one epoch
  for step, batch in enumerate(train_dataloader):

    #print(step,batch)
    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = batch
    #print(b_input_ids.shape, b_input_mask.shape, b_labels.shape)
    # Clear out the gradients (by default they accumulate)
    optimizer1.zero_grad()
    # Forward pass
    outputs = model1(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
    loss = outputs['loss']
    train_loss_set.append(loss.item())    
    # Backward pass
    loss.backward()
    # Update parameters and take a step using the computed gradient
    optimizer1.step()

    # Update the learning rate.
    scheduler1.step()
    
    
    # Update tracking variables
    tr_loss += loss.item()
    nb_tr_examples += b_input_ids.size(0)
    nb_tr_steps += 1

  print("Train loss: {}".format(tr_loss/nb_tr_steps))
    
    
  # Validation

  # Put model in evaluation mode to evaluate loss on the validation set
  model1.eval()

  # Tracking variables 
  predictions1_train , true_labels1_train = [], []

  # Tracking variables 
  eval_loss, eval_accuracy = 0, 0
  nb_eval_steps, nb_eval_examples = 0, 0

  # Evaluate data for one epoch
  for batch in validation_dataloader:
    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = batch
    # Telling the model not to compute or store gradients, saving memory and speeding up validation
    with torch.no_grad():
      # Forward pass, calculate logit predictions
      logits = model1(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    
    # Move logits and labels to CPU
    logits = logits['logits'].softmax(dim=-1).detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()


    tmp_eval_accuracy = flat_accuracy(logits, label_ids)
    
    eval_accuracy += tmp_eval_accuracy
    nb_eval_steps += 1

    # Store predictions and true labels
    predictions1_train.append(logits)
    true_labels1_train.append(label_ids)

  print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))

### Model 2

In [None]:
t = [] 

# Store our loss and accuracy for plotting
train_loss_set = []

# trange is a tqdm wrapper around the normal python range
for _ in trange(epochs, desc="Epoch"):
  
  
  # Training
  
  # Set our model to training mode (as opposed to evaluation mode)
  model2.train()
  
  # Tracking variables
  tr_loss = 0
  nb_tr_examples, nb_tr_steps = 0, 0
  
  # Train the data for one epoch
  for step, batch in enumerate(train_dataloader):
    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = batch
    # Clear out the gradients (by default they accumulate)
    optimizer2.zero_grad()
    # Forward pass
    outputs = model2(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
    loss = outputs['loss']
    train_loss_set.append(loss.item())    
    # Backward pass
    loss.backward()
    # Update parameters and take a step using the computed gradient
    optimizer2.step()

    # Update the learning rate.
    scheduler2.step()
    
    
    # Update tracking variables
    tr_loss += loss.item()
    nb_tr_examples += b_input_ids.size(0)
    nb_tr_steps += 1

  print("Train loss: {}".format(tr_loss/nb_tr_steps))
    
    
  # Validation

  # Put model in evaluation mode to evaluate loss on the validation set
  model2.eval()

  # Tracking variables 
  predictions2_train , true_labels2_train = [], []

  # Tracking variables 
  eval_loss, eval_accuracy = 0, 0
  nb_eval_steps, nb_eval_examples = 0, 0

  # Evaluate data for one epoch
  for batch in validation_dataloader:
    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = batch
    # Telling the model not to compute or store gradients, saving memory and speeding up validation
    with torch.no_grad():
      # Forward pass, calculate logit predictions
      logits = model2(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    
    # Move logits and labels to CPU
    logits = logits['logits'].softmax(dim=-1).detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()

    tmp_eval_accuracy = flat_accuracy(logits, label_ids)
    
    eval_accuracy += tmp_eval_accuracy
    nb_eval_steps += 1

    # Store predictions and true labels
    predictions2_train.append(logits)
    true_labels2_train.append(label_ids)

  print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))

### Model 3

In [None]:
# t = [] 

# # Store our loss and accuracy for plotting
# train_loss_set = []

# # trange is a tqdm wrapper around the normal python range
# for _ in trange(epochs, desc="Epoch"):
  
  
#   # Training
  
#   # Set our model to training mode (as opposed to evaluation mode)
#   model3.train()
  
#   # Tracking variables
#   tr_loss = 0
#   nb_tr_examples, nb_tr_steps = 0, 0
  
#   # Train the data for one epoch
#   for step, batch in enumerate(train_dataloader):
#     # Add batch to GPU
#     batch = tuple(t.to(device) for t in batch)
#     # Unpack the inputs from our dataloader
#     b_input_ids, b_input_mask, b_labels = batch
#     # Clear out the gradients (by default they accumulate)
#     optimizer1.zero_grad()
#     # Forward pass
#     outputs = model3(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
#     loss = outputs['loss']
#     train_loss_set.append(loss.item())    
#     # Backward pass
#     loss.backward()
#     # Update parameters and take a step using the computed gradient
#     optimizer1.step()

#     # Update the learning rate.
#     scheduler1.step()
    
    
#     # Update tracking variables
#     tr_loss += loss.item()
#     nb_tr_examples += b_input_ids.size(0)
#     nb_tr_steps += 1

#   print("Train loss: {}".format(tr_loss/nb_tr_steps))
    
    
#   # Validation

#   # Put model in evaluation mode to evaluate loss on the validation set
#   model3.eval()

#   # Tracking variables 
#   predictions3_train , true_labels3_train = [], []

#   # Tracking variables 
#   eval_loss, eval_accuracy = 0, 0
#   nb_eval_steps, nb_eval_examples = 0, 0

#   # Evaluate data for one epoch
#   for batch in validation_dataloader:
#     # Add batch to GPU
#     batch = tuple(t.to(device) for t in batch)
#     # Unpack the inputs from our dataloader
#     b_input_ids, b_input_mask, b_labels = batch
#     # Telling the model not to compute or store gradients, saving memory and speeding up validation
#     with torch.no_grad():
#       # Forward pass, calculate logit predictions
#       logits = model3(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    
#     # Move logits and labels to CPU
#     logits = logits['logits'].softmax(dim=-1).detach().cpu().numpy()
#     label_ids = b_labels.to('cpu').numpy()


#     tmp_eval_accuracy = flat_accuracy(logits, label_ids)
    
#     eval_accuracy += tmp_eval_accuracy
#     nb_eval_steps += 1

#     # Store predictions and true labels
#     predictions3_train.append(logits)
#     true_labels3_train.append(label_ids)

#   print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))

### Training Evaluation

In [None]:
predictions1_train_flat = [i[1] for a in predictions1_train for i in a]
predictions2_train_flat = [i[1] for a in predictions2_train for i in a]
predictions3_train_flat = [i[1] for a in predictions3_train for i in a]

In [None]:
predictions1_train_flat[:3]

In [None]:
predictions1_train_flat = [1 if i >=0.75 else 0 for i in predictions1_train_flat]
predictions2_train_flat = [1 if i >=0.75 else 0 for i in predictions2_train_flat]
predictions3_train_flat = [1 if i >=0.75 else 0 for i in predictions3_train_flat]

In [None]:
true_labels1_train_flat = [i for a in true_labels1_train for i in a]
true_labels2_train_flat = [i for a in true_labels2_train for i in a]
true_labels3_train_flat = [i for a in true_labels3_train for i in a]

In [None]:
recall_train1 = metric_fn(true_labels1_train_flat, predictions1_train_flat)['eval_recall']
precision_train1 = metric_fn(true_labels1_train_flat, predictions1_train_flat)['eval_precision']
f1_train1 = metric_fn(true_labels1_train_flat, predictions1_train_flat)['eval_f1']
auc11 = accuracy_score(true_labels1_train_flat, predictions1_train_flat)


recall_train2 = metric_fn(true_labels2_train_flat, predictions2_train_flat)['eval_recall']
precision_train2 = metric_fn(true_labels2_train_flat, predictions2_train_flat)['eval_precision']
f1_train2 = metric_fn(true_labels2_train_flat, predictions2_train_flat)['eval_f1']
auc22 = accuracy_score(true_labels2_train_flat, predictions2_train_flat)

recall_train3 = metric_fn(true_labels3_train_flat, predictions3_train_flat)['eval_recall']
precision_train3 = metric_fn(true_labels3_train_flat, predictions3_train_flat)['eval_precision']
f1_train3 = metric_fn(true_labels3_train_flat, predictions3_train_flat)['eval_f1']
auc33 = accuracy_score(true_labels3_train_flat, predictions3_train_flat)

In [None]:
print(f"Precision train1: {precision_train1}")
print(f"recall train1: {recall_train1}")
print(f"f1 train1: {f1_train1}")
print(f"auc train11: {auc11}")
print("\n")
print(f"Precision train2: {precision_train2}")
print(f"recall train2: {recall_train2}")
print(f"f1 train2: {f1_train2}")
print(f"auc train22: {auc22}")
print("\n")
print(f"Precision train3: {precision_train3}")
print(f"recall train3: {recall_train3}")
print(f"f1 train3: {f1_train3}")
print(f"auc train33: {auc33}")

Saving predictions in train

In [None]:
pred_train1 = pd.DataFrame(predictions1_train_flat, columns = ['Prediction_train1'])
pred_train2 = pd.DataFrame(predictions2_train_flat, columns = ['Prediction_train2'])
pred_train3 = pd.DataFrame(predictions3_train_flat, columns = ['Prediction_train3'])

In [None]:
df_train_pred = pd.concat([df, pred_train1, pred_train2, pred_train3], axis = 1)
df_train_pred.head()

In [None]:
df_train_pred.to_csv("df_train_10_pred_amb_lfnorm_14.csv")

# Prediction

### Preprocessing test dataset

In [None]:
df_test = pd.read_csv("test_data_beto_10_NOamb_lfnorm_medline.csv")

df_test['sentences'] = df_test['long_form'].map(lambda x: "[CLS] " + x + " [SEP] ")+df_test['context'] + ' [SEP]'

# Create sentence and label lists
sentences_test = df_test.sentences.values

# We need to add special tokens at the beginning and end of each sentence for BERT to work properly
# sentences = ["[CLS] " + sentence + " [SEP]" for sentence in sentences]
labels_test = df_test.label.values

tokenized_texts_test = [tokenizer.tokenize(sent) for sent in sentences_test]


MAX_LEN = 128

# Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
input_ids_test = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts_test]
# Pad our input tokens
input_ids_test = pad_sequences(input_ids_test, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
# Create attention masks
attention_masks = []

# Create a mask of 1s for each token followed by 0s for padding
for seq in input_ids_test:
  seq_mask = [float(i>0) for i in seq]
  attention_masks.append(seq_mask) 

prediction_inputs = torch.tensor(input_ids_test)
prediction_masks = torch.tensor(attention_masks)
prediction_labels = torch.tensor(labels_test)
  
batch_size = 32  


prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

### Prediction on test set model 1

In [None]:
# Put model in evaluation mode
model1.eval()

# Tracking variables 
predictions1 , true_labels = [], []

# Predict 
for batch in prediction_dataloader:
  # Add batch to GPU
  batch = tuple(t.to(device) for t in batch)
  # Unpack the inputs from our dataloader
  b_input_ids, b_input_mask, b_labels = batch
  # Telling the model not to compute or store gradients, saving memory and speeding up prediction
  with torch.no_grad():
    # Forward pass, calculate logit predictions
    logits = model1(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)

  # Move logits and labels to CPU
  logits = logits['logits'].softmax(dim=-1).detach().cpu().numpy()
  label_ids = b_labels.to('cpu').numpy()

  tmp_eval_accuracy= flat_accuracy(logits, label_ids)
    
  eval_accuracy += tmp_eval_accuracy
  nb_eval_steps += 1
  
  # Store predictions and true labels
  predictions1.append(logits)
  true_labels.append(label_ids)

### Prediction on test set model 2

In [None]:
# Put model in evaluation mode
model2.eval()

# Tracking variables 
predictions2 , true_labels = [], []

# Predict 
for batch in prediction_dataloader:
  # Add batch to GPU
  batch = tuple(t.to(device) for t in batch)
  # Unpack the inputs from our dataloader
  b_input_ids, b_input_mask, b_labels = batch
  # Telling the model not to compute or store gradients, saving memory and speeding up prediction
  with torch.no_grad():
    # Forward pass, calculate logit predictions
    logits = model2(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    

  # Move logits and labels to CPU
  logits = logits['logits'].softmax(dim=-1).detach().cpu().numpy()
  label_ids = b_labels.to('cpu').numpy()

  tmp_eval_accuracy= flat_accuracy(logits, label_ids)
    
  eval_accuracy += tmp_eval_accuracy
  nb_eval_steps += 1
  
  # Store predictions and true labels
  predictions2.append(logits)
  true_labels.append(label_ids)

### Prediction on test set model 3

In [None]:
# # Put model in evaluation mode
# model3.eval()

# # Tracking variables 
# predictions3 , true_labels = [], []

# # Predict 
# for batch in prediction_dataloader:
#   # Add batch to GPU
#   batch = tuple(t.to(device) for t in batch)
#   # Unpack the inputs from our dataloader
#   b_input_ids, b_input_mask, b_labels = batch
#   # Telling the model not to compute or store gradients, saving memory and speeding up prediction
#   with torch.no_grad():
#     # Forward pass, calculate logit predictions
#     logits = model1(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)

#   # Move logits and labels to CPU
#   logits = logits['logits'].softmax(dim=-1).detach().cpu().numpy()
#   label_ids = b_labels.to('cpu').numpy()

#   tmp_eval_accuracy= flat_accuracy(logits, label_ids)
    
#   eval_accuracy += tmp_eval_accuracy
#   nb_eval_steps += 1
  
#   # Store predictions and true labels
#   predictions3.append(logits)
#   true_labels.append(label_ids)

## Voting Ensemble

In [None]:
predictions1_flat = [i for a in predictions1 for i in a]
predictions2_flat = [i for a in predictions2 for i in a]
#predictions3_flat = [i for a in predictions3 for i in a]

In [None]:
#average of predictions of both models, as ensemble
predictions_total = []
for a,b in enumerate(zip(predictions1_flat, predictions2_flat)):
    predictions_total.append((b[0][1]+b[1][1])/2)

In [None]:
#average of predictions of tree models, as ensemble
# predictions_total = []
# for a,b in enumerate(zip(predictions1_flat, predictions2_flat, predictions3_flat)):
#     predictions_total.append((b[0][1]+b[1][1]+b[2][1])/3)

In [None]:
true_labels_flat = [i for a in true_labels for i in a]
true_labels_flat[:2]

Change soft output to hard output to compare with true labels

In [None]:
predictions_total_binari = [1 if i >=0.75 else 0 for i in predictions_total]

In [None]:
predictions_total_binari[:5]

# Evaluation

In [None]:
recall = metric_fn(true_labels_flat, predictions_total_binari)['eval_recall']
precision = metric_fn(true_labels_flat, predictions_total_binari)['eval_precision']
f1score = f1_score(true_labels_flat, predictions_total_binari)
auc = accuracy_score(true_labels_flat, predictions_total_binari)

In [None]:
print(f"Precision: {precision}")
print(f"recall: {recall}")
print(f"f1_score: {f1score}")
print(f"auc bien: {auc}")

In [None]:
#Just one model
predictions1_flat = [i for a in predictions1 for i in a]
true_labels_flat = [i for a in true_labels for i in a]
predictions_total_binari = [1 if i[1] >=0.75 else 0 for i in predictions1_flat]

recall = metric_fn(true_labels_flat, predictions_total_binari)['eval_recall']
precision = metric_fn(true_labels_flat, predictions_total_binari)['eval_precision']
f1score = f1_score(true_labels_flat, predictions_total_binari)
auc = accuracy_score(true_labels_flat, predictions_total_binari)

print(f"Precision: {precision}")
print(f"recall: {recall}")
print(f"f1_score: {f1score}")
print(f"auc: {auc}")

In [None]:
pred = pd.DataFrame(predictions_total_binari, columns = ['Prediction'])

In [None]:
df_test_pred = pd.concat([df_test, pred], axis = 1)
df_test_pred.head()

In [None]:
df_test_pred.to_csv("../data/data_predict/df_test_10_pred_NOamb_lfnorm_medline_model2.csv")