# Specify Configurations

## specify model configs

In [1]:
PRE_TRAINED_MODEL_NAME = 'bert-base-cased'
BATCH_SIZE = 4
LEARNING_RATE = 1e-5
EPOCHS = 10
TRAINABLE_ENCODER_PARAMS = True
cwd = '/content/drive/My Drive/Colab Notebooks/github/' # specify your current working directory here
file_directory = cwd+'MIL data/'

## specify run configs

In [2]:
# ALPHA is a parameter to weight the loss of rating and sentiments per review
# BETA is the ratio of train data that gets the local data injection in the loss function with sentence sentiments
# SHUFFLED_BY = 'R' # ids of train instances for local data injection are shuffled by reviews (R) or by sentences (S) 
run_configs = [(0.5,0.001,'R') # (ALPHA,BETA,SHUFFLED_BY)
               ]


# Load Data

## connect with google drive

In [3]:
import os
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
print(os.listdir(cwd))

Mounted at /content/drive
['MIL data', 'MIL models', 'py_scripts', 'data_input_milldi.json', 'Data_preprocessing.ipynb', 'README_temp.gdoc', 'Controller_MILPS.ipynb']


## prepare train, dev, test data

### import MIL_nn_utils

In [4]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [5]:
# load train, eval, prediction method
!pip install -qq transformers
import numpy as np
import sys
if cwd+'py_scripts/' not in sys.path:
  sys.path.append(cwd+'py_scripts/')
if "MIL_nn_utils" in sys.modules.keys():
  del sys.modules['MIL_nn_utils']
from MIL_nn_utils import *

[K     |████████████████████████████████| 3.5 MB 8.7 MB/s 
[K     |████████████████████████████████| 596 kB 47.4 MB/s 
[K     |████████████████████████████████| 895 kB 41.0 MB/s 
[K     |████████████████████████████████| 6.5 MB 38.6 MB/s 
[K     |████████████████████████████████| 67 kB 5.0 MB/s 
[?25h

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/426k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

### load data

In [6]:
data_train=torch.load(file_directory+'data_train_0.7.bin')
data_dev=torch.load(file_directory+'data_dev_0.15.bin')
data_test=torch.load(file_directory+'data_test_0.15.bin')

### transform senti scores from 5 classes to 3 classes

In [7]:
class MILRestsDataset_senti3class(MILRestsDataset):
  def __init__(self, revid_2_sentcid_2_data_raw, tokenizer, max_seq_len_rev, max_seq_len_sentc):
    super(MILRestsDataset_senti3class, self).__init__()

  def senti_to_3class(self, s):
    if s<=-0.5:
      return -1.0
    elif -0.5<s and s<0.5:
      return 0.0
    elif 0.5<=s:
      return 1.0

  def __getitem__(self, idx):
    id_rev = self.ids_rev[idx]
    rating_rev = torch.tensor(self.ratings_rev[idx],dtype=torch.long)
    # revs are already padded! sentences get padded in function 'tokenize'
    id_sentc_list = self.id_sentc_lists[idx]
    tokens_sentc_list = self.tokens_sentc_lists[idx]
    senti_sentc_list = [torch.tensor(self.senti_to_3class(s),dtype=torch.long) for s in self.senti_sentc_lists[idx]]
    tokenids_sentc_list = list()
    attmasks_sentc_list = list()
    for tokens in tokens_sentc_list:
      bert_encoded = tokenizer(tokens,#tokens,
                            is_split_into_words=True,
                            padding='max_length',
                            truncation=True,
                            return_token_type_ids=False,
                            return_attention_mask=True,
                            return_tensors="pt",max_length= self.max_seq_len_sentc)
      tokenids_sentc_list.append(bert_encoded['input_ids'].flatten())
      attmasks_sentc_list.append(bert_encoded['attention_mask'].flatten())

    return {
        'id_rev':id_rev,
        'rating_rev':rating_rev,
        'id_sentc_list':id_sentc_list,
        'senti_sentc_tensor':torch.stack(senti_sentc_list),
        'tokenids_sentc_tensor':torch.stack(tokenids_sentc_list),
        'attmasks_sentc_tensor':torch.stack(attmasks_sentc_list)
    }

In [8]:
data_train.__class__ = MILRestsDataset_senti3class
data_dev.__class__ = MILRestsDataset_senti3class
data_test.__class__ = MILRestsDataset_senti3class

# Create randomly sorted lists of review IDs and sentence IDs
lists are used for enabling local data injection with sentence-level sentiments for a certain ratio (BETA) for train data (depending on review IDs or sentence IDs)

In [9]:
from os.path import exists
filename_df_shuffled_rev_ids = 'data_train_0.7_df_shuffled_rev_ids.pkl'
file_exists = exists(file_directory+filename_df_shuffled_rev_ids)
if not file_exists:  
  import pandas as pd
  ids_rev = list()
  ids_sentc = list()
  id_rev_to_ids_sentc = dict()
  for idx in range(len(data_train)):
    row = data_train[idx]
    id_rev = row['id_rev']
    id_sentc_list = row['id_sentc_list']
    id_rev_to_ids_sentc[id_rev] = id_sentc_list
    for id_sentc in id_sentc_list:
      ids_rev.append(id_rev)
      ids_sentc.append(id_sentc)
  df = pd.DataFrame.from_dict({'id_rev':ids_rev,'id_sentc':ids_sentc})
  df_shuffled_sentc_ids = df.sample(frac=1)
  ids_rev = list()
  ids_sentc = list()
  for id_rev,curr_ids_sentc in id_rev_to_ids_sentc.items():
    for id_sentc in curr_ids_sentc:
      ids_rev.append(id_rev)
      ids_sentc.append(id_sentc)
  df_shuffled_rev_ids = pd.DataFrame.from_dict({'id_rev':ids_rev,'id_sentc':ids_sentc})
  df_shuffled_rev_ids.to_pickle(file_directory+filename_df_shuffled_rev_ids)
  filename_df_shuffled_sentc_ids = 'data_train_0.7_df_shuffled_sentc_ids.pkl'
  df_shuffled_sentc_ids.to_pickle(file_directory+filename_df_shuffled_sentc_ids)

# Create Neural Network for MIL with BERT

## create MILnn class

### import torch and BERT

In [10]:
from torch import nn
from transformers import BertModel
bert_hidden_size = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME).config.hidden_size

Downloading:   0%|          | 0.00/416M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


#### sentence encoder

In [11]:
# create sentence encoder class
class SentenceEncoder(nn.Module):
  def __init__(self):
    super(SentenceEncoder,self).__init__()
    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)

  def forward(self,tokenids_sentc_tensor,attmasks_sentc_tensor):
    po_list = list()
    for input_ids,attention_mask in zip(tokenids_sentc_tensor.split(1,1),attmasks_sentc_tensor.split(1,1)):
      _, pooled_output = self.bert(
        input_ids=input_ids.squeeze(1),
        attention_mask=attention_mask.squeeze(1),
        return_dict=False # otherwise the output of this layer is not a tensor and the next layer raises a ValueError
        )
      po_list.append(pooled_output.unsqueeze(1))
    out_cat = torch.cat(po_list,1)
    return out_cat

#### attention weights

In [12]:
# create attention weights layer
class AttentionWeights(nn.Module):
  def __init__(self,hidden_size):
    super(AttentionWeights,self).__init__()
    self.gru = nn.GRU(input_size = hidden_size,
                      hidden_size = hidden_size,
                      num_layers = 1,
                      batch_first = True,
                      dropout=0.5,
                      bidirectional=True) #b,s,e:b,s,2*e
    self.alpha = nn.Sequential(
        nn.Linear(hidden_size * 2, hidden_size * 2),
        nn.Tanh(),
        nn.Linear(hidden_size * 2, 1, bias=False)
    )#b,2*e:b,1 ; is used for each element in the sequence

  def forward(self,sentc_enc):
    out_gru, h_n = self.gru(sentc_enc) # h_n is not used
    alphas = []
    for sent in out_gru.split(1,1):
      out_alpha = self.alpha(sent.squeeze(1))
      alphas.append(out_alpha) # .unsqueeze is not necessary since alpha reduces one dimension to 1,
                                 # which is used for stacking/concatenation
    out_cat = torch.cat(alphas,1).softmax(1)
    return out_cat

#### time distributed dense layer 

In [13]:
# create time distributed dense class
class TimeDistrDense(nn.Module):
  def __init__(self,in_shape,out_shape):
    super(TimeDistrDense,self).__init__()
    self.drop = nn.Dropout(0.5)
    self.lin = nn.Linear(in_shape,out_shape)
    self.sigm = nn.Sigmoid()

  def forward(self,sentc_enc):
    out_list = list()
    for sent in sentc_enc.split(1,1):
      out_drop = self.drop(sent.squeeze(1))
      out_lin = self.lin(out_drop)
      out_sigm = self.sigm(out_lin)
      out_list.append(out_sigm) # .unsqueeze is not necessary since alpha reduces one dimension to 1,
                              # which is used for stacking/concatenation
    out_cat = torch.cat(out_list,1)
    return out_cat

#### Milldi_angelidis_senti3class

In [14]:
class Milldi_angelidis_senti3class(nn.Module):# nn is a module from pytroch
  def __init__(self):
    super(Milldi_angelidis_senti3class, self).__init__()
    self.hidden_size = bert_hidden_size
    self.sentc_enc = SentenceEncoder()
    self.attention_weights = AttentionWeights(self.hidden_size) #input params
    self.tdd = TimeDistrDense(self.hidden_size,1)

  def forward(self, tokenids_sentc_tensor, attmasks_sentc_tensor):
    out_sentc_enc = self.sentc_enc(tokenids_sentc_tensor, attmasks_sentc_tensor) #b,s,t:b,s,e
    out_attention_weights = self.attention_weights(out_sentc_enc) #b,s,e:b,s
    out_tdd = self.tdd(out_sentc_enc) #b,s,e:b,s
    out_att = torch.mul(out_tdd,out_attention_weights).sum(1)#(b,s),(b,s):b
    out_rating = torch.add(torch.mul(out_att,4),1)
    out_sentiments = torch.add(torch.mul(out_tdd,2),-1)
    return out_rating, out_sentiments

# Definition of help methods for evaluation of MILLDI
- generate_ldi_masks_tensor: this method uses the shuffled list of train review IDs and removes the sentence level supervision (sentence sentiment classes) for all review IDs not contained in train_instances_with_ldi.
- train_epoch_milldi_partial: this method is used to tune the parameters of MILLDI for one epoch of train data.
- eval_model_milldi: this method is used to compute the accuracy and average loss of a MILLDI model on a given dataset (e.g. dev or test data) without tuning the model's parameters.
- get_predictions_milldi: this method is used to get the predictions of a MILLDI model

## generate_ldi_masks_tensor

In [15]:
def generate_ldi_masks_tensor(batch_id_rev,batch_id_sentc_list,device,train_instances_with_ldi):
  batch_id_sentc_list_transp = torch.transpose(torch.stack([torch.tensor([int(s) for s in l]) for l in batch_id_sentc_list]),
                                          0,1)
  ldi_masks_list = list()
  for id_rev,id_sentc_list in zip(batch_id_rev,batch_id_sentc_list_transp):
    if id_rev in set(train_instances_with_ldi['id_rev'].values):
      curr_masks = [(train_instances_with_ldi == np.array([id_rev,str(int(i))])).all(1).any() for i in id_sentc_list]
      ldi_masks_list.append(curr_masks)
    else:
      ldi_masks_list.append([False for i in id_sentc_list])
  ldi_masks_tensor = torch.tensor(ldi_masks_list).long().to(device)
  return ldi_masks_tensor


## train_epoch_milldi_partial

In [16]:
def train_epoch_milldi_partial(
  model, 
  data_loader, 
  loss_fn, 
  optimizer, 
  device, 
  scheduler,
  alpha,
  train_instances_with_ldi):
  model.to(device)
  model = model.train() # sets the model to training mode
  loss_fn.to(device)

  losses = []
  correct_predictions = 0
  n_examples = 0

  for batch,d in enumerate(data_loader):
    tokenids_sentc_tensor = d["tokenids_sentc_tensor"].to(device)
    attmasks_sentc_tensor = d["attmasks_sentc_tensor"].to(device)
    targets = d["rating_rev"].to(device)
    senti_sentc_tensor = d['senti_sentc_tensor'].to(device)

    batch_id_rev = d['id_rev']
    batch_id_sentc_list = d['id_sentc_list']
    ldi_masks_tensor = generate_ldi_masks_tensor(batch_id_rev,batch_id_sentc_list,device,train_instances_with_ldi)

    rating_preds,sentiments_preds = model(
        tokenids_sentc_tensor,
        attmasks_sentc_tensor
        )
    # compute predictions and loss
    loss = alpha * loss_fn(rating_preds, targets)+ (1-alpha) * loss_fn(torch.mul(ldi_masks_tensor,sentiments_preds), \
                                                                       torch.mul(ldi_masks_tensor,senti_sentc_tensor))
    correct_predictions += torch.sum(torch.round(rating_preds) == targets)
    n_examples += targets.shape[0]
    losses.append(loss.item())

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) # clip exploding gradient
    optimizer.step()
    scheduler.step()
    if batch % 20 == 0: # to get more details while training
      loss_temp = loss.item()
      size = len(data_loader)
      # print(f"loss: {loss_temp:>7f}  [{batch:>5d}/{size:>5d}]")

  acc = correct_predictions.double() / n_examples
  return acc, np.mean(losses)

## eval_model_milldi

In [17]:
def eval_model_milldi(model, data_loader, loss_fn, device, alpha):
  model.to(device)
  model = model.eval()
  loss_fn.to(device)

  losses = []
  correct_predictions = 0
  n_examples = 0

  with torch.no_grad(): # no_grad() reduces memory consumption for computations, if you re sure not to use gradients in ".backward()"
    for d in data_loader:
      tokenids_sentc_tensor = d["tokenids_sentc_tensor"].to(device)
      attmasks_sentc_tensor = d["attmasks_sentc_tensor"].to(device)
      targets = d["rating_rev"].to(device)
      senti_sentc_tensor = d['senti_sentc_tensor'].to(device)

      rating_preds,sentiments_preds = model(
          tokenids_sentc_tensor,
          attmasks_sentc_tensor
          )
      # compute predictions and loss
      loss = alpha * loss_fn(rating_preds, targets)+ (1-alpha) * loss_fn(sentiments_preds, senti_sentc_tensor)
      correct_predictions += torch.sum(torch.round(rating_preds) == targets)
      n_examples += targets.shape[0]
      losses.append(loss.item())
  acc = correct_predictions.double() / n_examples
  return acc, np.mean(losses)

## get_predictions_milldi

In [18]:
def get_predictions_milldi(model, data_loader, device):
  model = model.eval()
  
  rating_predictions = []
  rating_predictions_exact = []
  rating_true_values = []

  sentiments_predictions = []
  sentiments_predictions_exact = []
  sentiments_true_values = []


  with torch.no_grad():
    for d in data_loader:

      tokenids_sentc_tensor = d["tokenids_sentc_tensor"].to(device)
      attmasks_sentc_tensor = d["attmasks_sentc_tensor"].to(device)
      targets = d["rating_rev"].to(device)
      sentiments_targets_padded = d['senti_sentc_tensor'].to(device)

      rating_preds_exact,senti_preds_exact_padded = model(
          tokenids_sentc_tensor,
          attmasks_sentc_tensor
          )

      # prepare rating_predictions
      rating_preds = torch.round(rating_preds_exact)

      rating_predictions.extend(rating_preds)
      rating_predictions_exact.extend(rating_preds_exact)
      rating_true_values.extend(targets)

      # prepare sentiments_predictions without padding sentences 
      id_sentc_list = d['id_sentc_list']
      preds_senti_exact = remove_padded_sentc(senti_preds_exact_padded,id_sentc_list)
      preds_senti = [[round(i) for i in row] for row in preds_senti_exact]
      sentiments_targets = remove_padded_sentc(sentiments_targets_padded,id_sentc_list)


      sentiments_predictions.extend(preds_senti)
      sentiments_predictions_exact.extend(preds_senti_exact)
      sentiments_true_values.extend(sentiments_targets)



  rating_predictions = torch.stack(rating_predictions).cpu()
  rating_predictions_exact = torch.stack(rating_predictions_exact).cpu()
  rating_true_values = torch.stack(rating_true_values).cpu()
  return (rating_predictions, rating_predictions_exact, rating_true_values,
    sentiments_predictions, sentiments_predictions_exact,sentiments_true_values)

# Definition of methods for training of MILLDI
- get_train_instances_with_ldi: this method returns a dataframe containing the training IDs that shall get fine-grained supervision (local data injection) during training.
- train_model: this method is used to train a MILLDI model

## get_train_instances_with_ldi
Load randomly sorted dfs with review and sentence IDs
df_shuffled_rev_ids: sentence ids of the same review are consecutive
df_shuffled_sentc_ids: all sentences are shuffled (independently of the corresponding review id)

In [19]:
def get_train_instances_with_ldi(file_directory,SHUFFLED_BY):
  import pandas as pd
  filename_df_shuffled_rev_ids = 'data_train_0.7_df_shuffled_rev_ids.pkl'
  filename_df_shuffled_sentc_ids = 'data_train_0.7_df_shuffled_sentc_ids.pkl'
  df_shuffled_rev_ids = pd.read_pickle(file_directory+filename_df_shuffled_rev_ids)
  df_shuffled_sentc_ids = pd.read_pickle(file_directory+filename_df_shuffled_sentc_ids)
  if SHUFFLED_BY == 'R':
    df_shuffled_train_instances = df_shuffled_rev_ids
  elif SHUFFLED_BY == 'S':
    df_shuffled_train_instances = df_shuffled_sentc_ids
  else: raise NotImpmlementedError
  train_instances_with_ldi = df_shuffled_train_instances.iloc[:round(BETA*df_shuffled_train_instances.shape[0]),:]
  return train_instances_with_ldi

## train_model

In [20]:
def train_model(train_dataloader,
                dev_dataloader,
                MODEL_NAME,
                EPOCHS,
                model,
                loss_fn,
                device,
                ALPHA,
                optimizer, 
                scheduler,
                train_instances_with_ldi):
  data_loader = train_dataloader
  history_filename = cwd+'MIL models/'+MODEL_NAME+'_history.bin'
  history = {'train_acc':[],'train_loss':[],'dev_acc':[],'dev_loss':[]}
  best_accuracy = 0
  best_loss = 100
  best_model = model
  for epoch in range(EPOCHS):

    # print(f'Epoch {epoch + 1}/{EPOCHS}')
    # print('-' * 10)

    if epoch == 0:
      train_acc, train_loss = eval_model_milldi(
        model,
        data_loader,
        loss_fn, 
        device,
        ALPHA
      )
      dev_acc, dev_loss = eval_model_milldi(
        model,
        dev_dataloader,
        loss_fn, 
        device,
        ALPHA
      )
      history['train_acc'].append(train_acc)
      history['train_loss'].append(train_loss)
      history['dev_acc'].append(dev_acc)
      history['dev_loss'].append(dev_loss)

    train_acc, train_loss = train_epoch_milldi_partial(
      model,
      data_loader,    
      loss_fn, 
      optimizer, 
      device, 
      scheduler,
      ALPHA,
      train_instances_with_ldi
    )

    # print(f'Train loss {train_loss} accuracy {train_acc}')

    dev_acc, dev_loss = eval_model_milldi(
      model,
      dev_dataloader,
      loss_fn, 
      device,
      ALPHA
    )

    # print(f'Dev   loss {dev_loss} accuracy {dev_acc}')
    # print()

    history['train_acc'].append(train_acc)
    history['train_loss'].append(train_loss)
    history['dev_acc'].append(dev_acc)
    history['dev_loss'].append(dev_loss)

    # save model and history if run is interrupted
    # torch.save(model.state_dict(), last_model_filename)
    torch.save(history,history_filename)
    #  if dev_acc > best_accuracy:
    if dev_loss < best_loss:
      best_model = model
      # torch.save(model.state_dict(), best_model_filename)
      best_loss = dev_loss
  # print(f"time for training: {round((time_end-time_start)/60)} min, {round((time_end-time_start)%60)} sec")
  return best_model

# Definition of additional performance metrics

## rmse

In [21]:
def RMSE(y_pred,y_test):
    return torch.sqrt(torch.mean((y_pred-y_test)**2))

# Evaluation of different Run Configs

## Definition of methods for evaluation
- eval_run_config: this method is used to run a complete evaluation (model training, validation and testing) for a run configuration. the results of this method are saved in "MIL models"

### eval_run_config

In [22]:
from transformers import get_linear_schedule_with_warmup, AdamW
from torch.utils.data import DataLoader
def eval_run_config(ALPHA,BETA,SHUFFLED_BY):
  MODEL_NAME = 'Milldi_A'+str(ALPHA)+'_B'+str(BETA)+SHUFFLED_BY+'_angelidis_senti3class'
  # load data
  train_instances_with_ldi = get_train_instances_with_ldi(file_directory,SHUFFLED_BY)
  train_dataloader = DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True)
  dev_dataloader = DataLoader(data_dev, batch_size=BATCH_SIZE, shuffle=True)
  test_dataloader = DataLoader(data_test, batch_size=BATCH_SIZE, shuffle=True)
  
  # instantiate model, optimizer, scheduler, loss function
  model = Milldi_angelidis_senti3class().to(device)
  optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, correct_bias=False) # parameters from original BERT paper: lr = 2e-5
  # "The BERT authors have some recommendations for fine-tuning"
  # since bert params are frozen, lr has to be ...
  total_steps = len(train_dataloader) * EPOCHS

  scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
  )
  # chose loss function
  loss_fn = nn.L1Loss().to(device)
  # loss_fn = nn.MSELoss().to(device)

  # set parameters of sentcence encoder to non-trainable/ freeze parameters
  if not TRAINABLE_ENCODER_PARAMS:
    for parameter in model.sentc_enc.parameters():
        parameter.requires_grad = False
  
  # train model
  print(MODEL_NAME)
  best_model = train_model(train_dataloader,
                           dev_dataloader,
                            MODEL_NAME,
                            EPOCHS,
                            model,
                            loss_fn,
                            device,
                            ALPHA,
                            optimizer, 
                            scheduler,
                            train_instances_with_ldi)
  # get predictions
  y_pred, y_pred_exact, y_test,y_senti_pred, y_senti_pred_exact, y_senti_test = get_predictions_milldi(
    best_model,
    test_dataloader,
    device
    )
  
  # evaluation of sentence sentiment prediction
  y_senti_test_onelist = []
  y_senti_pred_onelist = []
  y_senti_pred_exact_onelist = []
  for l in y_senti_test:
    y_senti_test_onelist.extend(l)
  for l in y_senti_pred:
    y_senti_pred_onelist.extend(l)
  for l in y_senti_pred_exact:
    y_senti_pred_exact_onelist.extend(l)
  # print(min(y_senti_pred_onelist),max(y_senti_pred_onelist))
  # print(min(y_senti_pred_exact_onelist),max(y_senti_pred_exact_onelist))
  # print(min(y_senti_test_onelist),max(y_senti_test_onelist))
  from sklearn.metrics import classification_report
  class_names = list(str(s) for s in range(-1,2))
  classif_report = ''
  classif_report += MODEL_NAME+'\n'
  classif_report += str(classification_report(y_senti_test_onelist, y_senti_pred_onelist, target_names=class_names))+'\n'
  print(class_names)
  rmse_val = RMSE(torch.tensor(y_senti_test_onelist).float(),torch.tensor(y_senti_pred_onelist).float()).item()
  print(f"RMSE senti predictions: {rmse_val}")
  classif_report += "RMSE senti predictions: "+str(rmse_val)
  print(f"range of y_pred_exact: {(min(y_senti_pred_onelist),max(y_senti_pred_onelist))}")
  print(classif_report)
  class_rep_sentc_senti_filename = file_directory.replace('MIL data','MIL models')+MODEL_NAME+'_classif_report.txt'
  with open(class_rep_sentc_senti_filename,'w') as of:
    of.writelines(classif_report)


## Evaluate all run configs

In [23]:
from time import time
t0 = time()
for ALPHA,BETA,SHUFFLED_BY in run_configs:
  eval_run_config(ALPHA,BETA,SHUFFLED_BY)
  t1 = time()
  t_diff = t1-t0
  t_sec,t_min,t_h = int(t_diff%60),int((t_diff-t_diff%60)/60%60),int((t_diff-t_diff%60-60*(t_diff-t_diff%60)/60%60)/(60*60))
  print('current runtime: '+str(t_h)+'h '+str(t_min)+'min '+str(t_sec)+'sec')

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  "num_layers={}".format(dropout, num_layers))


Milldi_A0.5_B0.001R_angelidis_senti3class
['-1', '0', '1']
RMSE senti predictions: 0.7913790941238403
range of y_pred_exact: (-1, 1)
Milldi_A0.5_B0.001R_angelidis_senti3class
              precision    recall  f1-score   support

          -1       0.54      0.63      0.58       326
           0       0.54      0.21      0.31       572
           1       0.62      0.86      0.72       761

    accuracy                           0.59      1659
   macro avg       0.57      0.57      0.54      1659
weighted avg       0.58      0.59      0.55      1659

RMSE senti predictions: 0.7913790941238403
current runtime: 1h 2min 31sec
