# E.1 Imports

In [1]:
# set current path to the parent, to enable absolute imports 

import os
from pathlib import Path

curr_path = Path(os.getcwd()).parent
os.chdir(curr_path)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats
import torch
import seaborn as sns
import sqlite3
import sqlalchemy
from transformers import BertTokenizer
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
import random

from torch.utils.data import DataLoader, RandomSampler
from torch.optim import Adam
from torch.nn import MSELoss
import math
from torch.utils.data import SequentialSampler
from transformers import BertForSequenceClassification

from main.active_learning.utils import seed_pool_split, experiment_AL
from main.active_learning.datasets import ALDataset

  from .autonotebook import tqdm as notebook_tqdm


# E.2 Data Loading and Prep

In [3]:
# put device onto GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {device}')
if torch.cuda.is_available():
  print(f"GPU name: {torch.cuda.get_device_name()}")


Using device: cpu


In [4]:
# read in: first 200 labeled instances
# toDO: read in more data

sent_df = pd.read_csv('files/datasets/labeled/l01_reuters_sample200.csv')

In [5]:
# read in: next 800 labeled instances and join
# comment this out if only wanting to run the first iteration of the experiment

send_df2 = pd.read_csv('files/datasets/labeled/l02_reuters_sample800.csv')

sent_df = pd.concat([sent_df, send_df2], axis='rows',ignore_index=True)

In [6]:
# drop the miscellaneous instances

sent_df = sent_df[sent_df['is_miscellaneous'] == False]

In [7]:
print(f'Total: {len(sent_df)} instances')

Total: 968 instances


In [8]:
# extract sentences and valence/arousal labels as numpy arrays

sentences = sent_df.sentence.values
v_labels = sent_df.valence.values
a_labels = sent_df.arousal.values

In [9]:
# toDO: check what else we can do with tokenization
# e.g. add financial words to the vocabulary
# e.g. take another pre-trained tokenizer model

# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

Loading BERT tokenizer...


In [10]:

# measure the maximum sentence length
# this is needed for adjusting the BERT size later


max_len = 0
# For every sentence...
for sent in sentences:
    # Tokenize the text and add `[CLS]` and `[SEP]` tokens.
    input_ids = tokenizer.encode(sent, add_special_tokens=True)
    # Update the maximum sentence length.
    max_len = max(max_len, len(input_ids))
print('Max sentence length: ', max_len)


Max sentence length:  70


In [11]:

# Tokenize all of the sentences and map the tokens to their word IDs.
input_ids = []
attention_masks = []

# For every sentence...
for sent in sentences:
    # `encode_plus` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    #   (5) Pad or truncate the sentence to `max_length`
    #   (6) Create attention masks for [PAD] tokens.
    encoded_dict = tokenizer.encode_plus(
        sent,  # Sentence to encode.
        add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
        max_length=70,  # Pad & truncate all sentences.
        padding='max_length',
        return_attention_mask=True,  # Construct attn. masks.
        return_tensors='pt',  # Return pytorch tensors.
    )

    # Add the encoded sentence to the list.
    input_ids.append(encoded_dict['input_ids'])

    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)

v_labels = torch.tensor(v_labels)
a_labels = torch.tensor(a_labels)

# Print sentence 0, now as a list of IDs.
print('Original: ', sentences[0])
print('Token IDs:', input_ids[0])
print('Valence: ', v_labels[0])
print('Arousal: ', a_labels[0])


Original:  The automaker also said it is considering introducing subcompact SUV models in South Korea, the United States and Europe.
Token IDs: tensor([  101,  1996,  8285,  8571,  2036,  2056,  2009,  2003,  6195, 10449,
         4942,  9006, 19498,  2102, 15620,  4275,  1999,  2148,  4420,  1010,
         1996,  2142,  2163,  1998,  2885,  1012,   102,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0])
Valence:  tensor(0.6400, dtype=torch.float64)
Arousal:  tensor(0.5100, dtype=torch.float64)


# E.3 Main Experiment

In [12]:
# set all the random seeds

RANDOM_STATE = 42 
random.seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
torch.manual_seed(RANDOM_STATE)
torch.cuda.manual_seed_all(RANDOM_STATE)

In [13]:
# experiment parameters

TRAIN_SIZE = 0.75
methods = ['random', 'farthest-first', 'mc-dropout']
batch_sizes = [16, 32, 64]  # 4,
lrs = [1e-5, 1e-6, 1e-7]  # 1e-5

In [14]:
# initiate results storage for valence and arousal models

v_results, a_results = {}, {}

In [None]:
# loop over all batch_sizes
for batch_size in batch_sizes:
    
    # initiate storage in dictionary, in a nested manner
    v_results[batch_size], a_results[batch_size] = {}, {}
    
    # loop over all the learning rates 
    for lr in lrs:
        
        # initiate storage in dictionary, in a nested manner
        v_results[batch_size][lr], a_results[batch_size][lr] = {}, {}
        
        # loop over all the methods
        for method in methods:
            
            # initiate storage in dictionary, in a nested manner
            v_results[batch_size][lr][method], a_results[batch_size][lr][method] = {}, {}
        
            # print out modifications of this experiment loop
            print("--" * 20)
            print(f"METHOD: {method}. BATCH SIZE: {batch_size}. LEARNING RATE: {lr} ")
            print("--" * 20)

            # seperate 25% for testing, 75% training
            v_train_ds, v_test_ds = seed_pool_split(input_ids, attention_masks, v_labels, seed_size=TRAIN_SIZE,
                                                    random_state=RANDOM_STATE)
            a_train_ds, a_test_ds = seed_pool_split(input_ids, attention_masks, a_labels, seed_size=TRAIN_SIZE,
                                                    random_state=RANDOM_STATE)

            # initiate seed and pool
            v_seed, v_pool = seed_pool_split(v_train_ds[0], v_train_ds[1], v_train_ds[2], seed_size=batch_size,
                                             random_state=RANDOM_STATE)
            a_seed, a_pool = seed_pool_split(a_train_ds[0], a_train_ds[1], a_train_ds[2], seed_size=batch_size,
                                             random_state=RANDOM_STATE)

            #v_results[method], a_results[method] = {}, {}
            #v_results[method][batch_size], a_results[method][batch_size] = {}, {}

            # download valence and arousal base models
            print("Downloading Valence model...")
            torch.manual_seed(42)
            v_model = BertForSequenceClassification.from_pretrained(
                'bert-base-uncased',
                num_labels=1,
                output_attentions=False,
                output_hidden_states=True)

            print("Downloading Arousal model...")
            torch.manual_seed(42)
            a_model = BertForSequenceClassification.from_pretrained(
                'bert-base-uncased',
                num_labels=1,
                output_attentions=False,
                output_hidden_states=True)

            # initialize Active Learning datasets
            v_seed_ds, v_pool_ds = ALDataset(v_seed[0], v_seed[1], v_seed[2]), ALDataset(v_pool[0], v_pool[1],
                                                                                         v_pool[2])

            a_seed_ds, a_pool_ds = ALDataset(a_seed[0], a_seed[1], a_seed[2]), ALDataset(a_pool[0], a_pool[1],
                                                                                         a_pool[2])

            # take a subsample only
#             SAMPLE_SIZE = 12
#             RANDOM_SEED = 42
#             v_pool_ds = v_pool_ds.subsample(SAMPLE_SIZE, RANDOM_SEED)
#             a_pool_ds = a_pool_ds.subsample(SAMPLE_SIZE, RANDOM_SEED)

#             print("Valence Sample Pool Size: ", len(v_pool_ds), "Arousal Sample Pool Size: ", len(a_pool_ds))

            # initate testsets
            v_test_set = TensorDataset(v_test_ds[0], v_test_ds[1], v_test_ds[2])
            a_test_set = TensorDataset(a_test_ds[0], a_test_ds[1], a_test_ds[2])

            # start the experiments
            print("--" * 20)
            print("RUNNING VALENCE EXPERIMENT")
            print("--" * 20)
            v_train_rmse_curve, v_test_loss_curve = experiment_AL(v_seed_ds, v_pool_ds, v_test_set, v_model, method, lr,
                                                                  batch_size, device)

            print("--" * 20)
            print("RUNNING AROUSAL EXPERIMENT")
            print("--" * 20)
            a_train_rmse_curve, a_test_loss_curve = experiment_AL(a_seed_ds, a_pool_ds, a_test_set, a_model, method, lr,
                                                                  batch_size, device)

            # store results
            v_results[batch_size][lr][method]['train'], v_results[batch_size][lr][method]['test'] = v_train_rmse_curve, v_test_loss_curve
            a_results[batch_size][lr][method]['train'], a_results[batch_size][lr][method]['test'] = a_train_rmse_curve, a_test_loss_curve

----------------------------------------
METHOD: random. BATCH SIZE: 16. LEARNING RATE: 1e-05 
----------------------------------------
Downloading Valence model...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Downloading Arousal model...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

----------------------------------------
RUNNING VALENCE EXPERIMENT
----------------------------------------
Total number of batches in the pool:  44
 Finding batch 1/44...
Current pool size:  710
Seed Size: 16 instances
Average RMSE:  0.032703203649900175
Average test RMSE: 0.22882304425170882
Method: random. Chosen indices: [654 114  25 281 250 228 142 104 692 558  89 604 432  32  30  95]
 Finding batch 2/44...
Current pool size:  694
Seed Size: 32 instances
Average RMSE:  0.017188646334003667
Average test RMSE: 0.1170248987089914
Method: random. Chosen indices: [223 238 517 616  27 574 203 665 558 429 225 459 603 284   6 163]
 Finding batch 3/44...
Current pool size:  678
Seed Size: 48 instances
Average RMSE:  0.013905941718869644
Average test RMSE: 0.1156854411456149
Method: random. Chosen indices: [432 348 284 159 220 344 104  94 389  99 367 352 618 270  44 470]
 Finding batch 4/44...
Current pool size:  662
Seed Size: 64 instances
Average RMSE:  0.011341417462253686
Average test 

Average RMSE:  0.003540247315137427
Average test RMSE: 0.08010095794103733
Method: random. Chosen indices: [113 172  54 130 121  43 168  21  72 131 169 162 158  85  23  60]
 Finding batch 35/44...
Current pool size:  166
Seed Size: 560 instances
Average RMSE:  0.0034888134870448926
Average test RMSE: 0.07965407524975093
Method: random. Chosen indices: [ 79  57  50  37   6  11  62 121 156  18 116 106 161 147  49  98]
 Finding batch 36/44...
Current pool size:  150
Seed Size: 576 instances
Average RMSE:  0.003643141592874176
Average test RMSE: 0.0824617288118272
Method: random. Chosen indices: [126 102  62  37   1  27 108  56  45 132 118  12 142  63  31 116]
 Finding batch 37/44...
Current pool size:  134
Seed Size: 592 instances
Average RMSE:  0.0033014481956015846
Average test RMSE: 0.0790524099417503
Method: random. Chosen indices: [ 34 118  81 113 129 109 114  40 121 115  66  63  70 133 124  61]
 Finding batch 38/44...
Current pool size:  118
Seed Size: 608 instances
Average RMSE:  0

Average RMSE:  0.004540312432662307
Average test RMSE: 0.06935910567151941
Method: random. Chosen indices: [193 233 164  96 122 292 196 119 210  22 162 242 195 197 339 333]
 Finding batch 25/44...
Current pool size:  326
Seed Size: 400 instances
Average RMSE:  0.004249278910789217
Average test RMSE: 0.06989103074302777
Method: random. Chosen indices: [ 77 253  18  64 257 302 169  51 225 269 233   7  73 209  79  38]
 Finding batch 26/44...
Current pool size:  310
Seed Size: 416 instances
Average RMSE:  0.004495855308014469
Average test RMSE: 0.07501978245057166
Method: random. Chosen indices: [240 135 173 203  41 168 273 194 162 249 277  18  35 120 147 116]
 Finding batch 27/44...
Current pool size:  294
Seed Size: 432 instances
Average RMSE:  0.004146007841130305
Average test RMSE: 0.06833360723138454
Method: random. Chosen indices: [ 46 222  50  51 227  85 153  14  23 166  28 150 183 191 220  74]
 Finding batch 28/44...
Current pool size:  278
Seed Size: 448 instances
Average RMSE:  0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Downloading Arousal model...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

----------------------------------------
RUNNING VALENCE EXPERIMENT
----------------------------------------
Total number of batches in the pool:  44
 Finding batch 1/44...
Current pool size:  710
Seed Size: 16 instances
Average RMSE:  0.032703203649900175
Average test RMSE: 0.22882304425170882
Number of embedding vectors:  16
Number of embedding vectors:  710
Method: farthest-first. Chosen indices: [562 671 351 283 644 302 159 584 343 185 598 589  16   7  59 534]
 Finding batch 2/44...
Current pool size:  694
Seed Size: 32 instances
Average RMSE:  0.013527304549315246
Average test RMSE: 0.12392798596240662
Number of embedding vectors:  32
Number of embedding vectors:  694
Method: farthest-first. Chosen indices: [208 123 364 441  33 302 608 489 673 677 223  37 215  85 348 504]
 Finding batch 3/44...
Current pool size:  678
Seed Size: 48 instances
Average RMSE:  0.013291367239658956
Average test RMSE: 0.12617847259759352
Number of embedding vectors:  48
Number of embedding vectors:  678

Average RMSE:  0.004642408651813929
Average test RMSE: 0.08234557657441323
Number of embedding vectors:  416
Number of embedding vectors:  310
Method: farthest-first. Chosen indices: [186 300  93  48 178  37 236 127  77 279  66 248 140 230 208 234]
 Finding batch 27/44...
Current pool size:  294
Seed Size: 432 instances
Average RMSE:  0.0042706273429462555
Average test RMSE: 0.08203346218316032
Number of embedding vectors:  432
Number of embedding vectors:  294
Method: farthest-first. Chosen indices: [ 44 255  27  41  96  36  81 146 154  40 231 168  53 132  15 215]
 Finding batch 28/44...
Current pool size:  278
Seed Size: 448 instances
Average RMSE:  0.004346381138586849
Average test RMSE: 0.08129171074347308
Number of embedding vectors:  448
Number of embedding vectors:  278
Method: farthest-first. Chosen indices: [227 266  13 256 129 117 109  47 239  45  79 223  63 216 139 202]
 Finding batch 29/44...
Current pool size:  262
Seed Size: 464 instances
Average RMSE:  0.0041312334962813

Method: farthest-first. Chosen indices: [141 345 563 186 179 417 519 545 271 423 578 180 246  76  97 604]
 Finding batch 8/44...
Current pool size:  598
Seed Size: 128 instances
Average RMSE:  0.009227689209733306
Average test RMSE: 0.10064376918791339
Number of embedding vectors:  128
Number of embedding vectors:  598
Method: farthest-first. Chosen indices: [382 121 539  80 312  87 564  46  89 361 227 550 311 353 410 128]
 Finding batch 9/44...
Current pool size:  582
Seed Size: 144 instances
Average RMSE:  0.008294472399839688
Average test RMSE: 0.09093443701312809
Number of embedding vectors:  144
Number of embedding vectors:  582
Method: farthest-first. Chosen indices: [308  92 491 292 441  55  84   8 229 387  77 451 231 101 244  78]
 Finding batch 10/44...
Current pool size:  566
Seed Size: 160 instances
Average RMSE:  0.007856633118936508
Average test RMSE: 0.09809660525277625
Number of embedding vectors:  160
Number of embedding vectors:  566
Method: farthest-first. Chosen indic

Average RMSE:  0.0035692023886117865
Average test RMSE: 0.07201072444047138
Number of embedding vectors:  528
Number of embedding vectors:  198
Method: farthest-first. Chosen indices: [151 128  61  33 177 139  37   7 175 163  53 188 130  24  72   5]
 Finding batch 34/44...
Current pool size:  182
Seed Size: 544 instances
Average RMSE:  0.003536833330193956
Average test RMSE: 0.06819229473904588
Number of embedding vectors:  544
Number of embedding vectors:  182
Method: farthest-first. Chosen indices: [143 159 155  14  55 123 178 134  37  89   4  93 169  74 179  73]
 Finding batch 35/44...
Current pool size:  166
Seed Size: 560 instances
Average RMSE:  0.003405158906408164
Average test RMSE: 0.0647525391732408
Number of embedding vectors:  560
Number of embedding vectors:  166
Method: farthest-first. Chosen indices: [ 51 120 107  70   4  30 100  96  45 132  53  89  11  78 139 161]
 Finding batch 36/44...
Current pool size:  150
Seed Size: 576 instances
Average RMSE:  0.00319560389779137

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Downloading Arousal model...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

----------------------------------------
RUNNING VALENCE EXPERIMENT
----------------------------------------
Total number of batches in the pool:  44
 Finding batch 1/44...
Current pool size:  710
Seed Size: 16 instances
Average RMSE:  0.032703203649900175
Average test RMSE: 0.22882304425170882
Method: mc-dropout. Chosen indices: [621 577 375 141 297 470 524 409 525 569 218  47 250 622 404  11]
 Finding batch 2/44...
Current pool size:  694
Seed Size: 32 instances
Average RMSE:  0.01265757120451506
Average test RMSE: 0.12300634715254573
Method: mc-dropout. Chosen indices: [285 408 154 174 517 632  51 252 598 584 391 291 609 167  70   3]
 Finding batch 3/44...
Current pool size:  678
Seed Size: 48 instances
Average RMSE:  0.01546830805792195
Average test RMSE: 0.119586608953323
Method: mc-dropout. Chosen indices: [325 142 457 572 511 619 391  47 180  96 459 121 346 623 642 415]
 Finding batch 4/44...
Current pool size:  662
Seed Size: 64 instances
Average RMSE:  0.01083530135545749
Aver

Average RMSE:  0.0036533457344314325
Average test RMSE: 0.08282451119064244
Method: mc-dropout. Chosen indices: [ 15 125  43  81  11 145 165 143 105 110 181  35  99  42 129  38]
 Finding batch 34/44...
Current pool size:  182
Seed Size: 544 instances
Average RMSE:  0.0034972837021538684
Average test RMSE: 0.08248875145225029
Method: mc-dropout. Chosen indices: [143 153 157 155  27  64 125  60  56 142 152  69 137 124  24 171]
 Finding batch 35/44...
Current pool size:  166
Seed Size: 560 instances
Average RMSE:  0.003231581321168597
Average test RMSE: 0.07996849111715625
Method: mc-dropout. Chosen indices: [105  70 147 157  89 125  72  26 103 109  32  58  48  14  20 114]
 Finding batch 36/44...
Current pool size:  150
Seed Size: 576 instances
Average RMSE:  0.003288954908214455
Average test RMSE: 0.07980536717118705
Method: mc-dropout. Chosen indices: [ 94  61  51 101  84  14 108 126  22  20 106  85  57  55  93  52]
 Finding batch 37/44...
Current pool size:  134
Seed Size: 592 instance

Average RMSE:  0.005542470678952397
Average test RMSE: 0.06984386109654495
Method: mc-dropout. Chosen indices: [253   9 307 347 159 218 189  60 198  65 232  29 248 372 326 221]
 Finding batch 23/44...
Current pool size:  358
Seed Size: 368 instances
Average RMSE:  0.005462182944545887
Average test RMSE: 0.06589803198502324
Method: mc-dropout. Chosen indices: [ 64 224 211 169 279  98 277  76  37 260 313 217 317  22 333  35]
 Finding batch 24/44...
Current pool size:  342
Seed Size: 384 instances
Average RMSE:  0.005218782312080526
Average test RMSE: 0.07336769279088703
Method: mc-dropout. Chosen indices: [ 48  36 324 305 331 288 199  96  55 229   6 181  22 285 338 341]
 Finding batch 25/44...
Current pool size:  326
Seed Size: 400 instances
Average RMSE:  0.004678032833922303
Average test RMSE: 0.10460390390349414
Method: mc-dropout. Chosen indices: [ 14 188 115 109 121 170 160  99  31 239 182 283  48 255  89 272]
 Finding batch 26/44...
Current pool size:  310
Seed Size: 416 instances


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Downloading Arousal model...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

----------------------------------------
RUNNING VALENCE EXPERIMENT
----------------------------------------
Total number of batches in the pool:  44
 Finding batch 1/44...
Current pool size:  710
Seed Size: 16 instances
Average RMSE:  0.032703203649900175
Average test RMSE: 0.47196166598611106
Method: random. Chosen indices: [ 97 320 437 681 261 383 156 703 485  68  93  87  95 442  98 381]
 Finding batch 2/44...
Current pool size:  694
Seed Size: 32 instances
Average RMSE:  0.02864648107709744
Average test RMSE: 0.41418016422382015
Method: random. Chosen indices: [133 569  61 600 575 337 686 125 420 362 681 433  52 294 614 319]
 Finding batch 3/44...
Current pool size:  678
Seed Size: 48 instances
Average RMSE:  0.0257328845949954
Average test RMSE: 0.3216341387563798
Method: random. Chosen indices: [360 106 591 519 217 158 672 493 229 110 358 569 376 117 285 587]
 Finding batch 4/44...
Current pool size:  662
Seed Size: 64 instances
Average RMSE:  0.020932060654450663
Average test RM

In [None]:
v_results

In [None]:
# produce graph


def plot_al_results(v_results, a_results, batch_size, lr):

    fig = plt.figure(figsize=(16,10))

    gs = fig.add_gridspec(nrows=2, ncols=2, hspace=0.05, wspace=0.05)

    axs = gs.subplots(sharex=True)


    for i, method in enumerate(v_results[batch_size][lr].keys()):
        axs[0, 0].plot(v_results[batch_size][lr][method]['train'], label=method)
        axs[1, 0].plot(v_results[batch_size][lr][method]['test'], label=method)
        axs[0, 0].legend(); axs[1, 0].legend()
        axs[0, 0].grid(); axs[1, 0].grid()

        axs[0,0].set_title('Valence')
        axs[0,0].set_ylabel('Training')
        axs[1,0].set_ylabel('Testing')

    for i, method in enumerate(a_results[batch_size][lr].keys()):
        axs[0, 1].plot(a_results[batch_size][lr][method]['train'], label=method)
        axs[1, 1].plot(a_results[batch_size][lr][method]['test'], label=method)
        axs[0, 1].legend(); axs[1, 1].legend()
        axs[0, 1].yaxis.tick_right(); axs[1, 1].yaxis.tick_right()
        axs[0, 1].grid(); axs[1, 1].grid()

        axs[0, 1].set_title('Arousal')

    fig.text(0.5, 0.07, 'Number of Epochs', ha='center')
    fig.text(0.05, 0.5, 'RMSE', ha='center', va='center', rotation='vertical')

    plt.savefig(f'files/results/active_learning_experiments/al_exp_batch_size_{batch_size}_learning_rate_{lr}_full.png')
    plt.show()


In [None]:
for batch_size in batch_sizes:
    for lr in lrs:
        plot_al_results(v_results, a_results, batch_size, lr)

In [None]:
v_results

In [None]:
# function to structure the results

v_res_summ = {}

v_res_summ['random'] = []
v_res_summ['farthest-first'] = []
v_res_summ['mc-dropout'] = []

for key in v_results.keys():
    for subkey in v_results[key].keys():
        for method in v_results[key][subkey].keys():
            v_res_summ[method].append(np.mean(v_results[key][subkey][method]['test']))

In [None]:
v_res_summ

In [None]:
# make a table to summarize the results

idx_arrs = [
    [16, 16, 16, 32, 32, 32, 64, 64, 64],
    [1e-5, 1e-6, 1e-7, 1e-5, 1e-6, 1e-7, 1e-5, 1e-6, 1e-7]
    ]

idx_tuples = list(zip(*idx_arrs))

index = pd.MultiIndex.from_tuples(idx_tuples, names=["batch_size", "learning_rate"])

# construct the results dataframe

v_res_summ_df = pd.DataFrame(v_res_summ, index=index)


In [None]:
print(v_res_summ_df.to_latex())