In [1]:
import datasets
import pickle
import transformers
import torch
import wandb
import numpy as np
import pandas as pd

from sklearn.metrics import precision_recall_fscore_support, accuracy_score, confusion_matrix, \
                            multilabel_confusion_matrix, matthews_corrcoef, average_precision_score
from sklearn.model_selection import train_test_split

from torch.nn import BCEWithLogitsLoss
from torch.utils.data import Dataset, DataLoader, random_split

from tqdm.auto import tqdm

from transformers import AutoTokenizer, BigBirdForSequenceClassification, Trainer, TrainingArguments, \
                            RobertaForSequenceClassification

## Preprocessing the data

In [None]:
labelizer = torch.load("saved/labelizer.torch")
l_df = labelizer.df

In [None]:
wanted_labels = ['19_desire_x', '19_intent_x', '19_capability_x', '19_timeframe_x', 
                 '18_substance', '18_depressed', '18_self_harm', '18_anxiety', 
                 '64_Yes']
df = l_df[wanted_labels].dropna()

In [None]:
texts = pd.read_pickle("saved/selected_messages.pickle")

In [13]:
%cd /

tokenizer = AutoTokenizer.from_pretrained("roberta-base")

/


In [7]:
%cd /

tokenizer = AutoTokenizer.from_pretrained("google/bigbird-roberta-base")

/


normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.


In [None]:
texts = texts.set_index('conversation_id')
texts = texts.loc[texts.interaction=='texter', ['interaction', 'message']]
texts['message'] = texts.message.str.strip().apply(lambda a:" "+a)
texts['encoded_message'] = " [" + texts.interaction + "] " + texts.message

In [7]:
encoded_conversations = texts.groupby(texts.index).encoded_message.agg(" ".join).apply(
    lambda a : f"{tokenizer.bos_token}{a}{tokenizer.eos_token}"
)

In [8]:
## adding labels to the text 
train_data = pd.DataFrame(encoded_conversations, index=encoded_conversations.index)
train_data.columns=['text']
train_data = pd.concat([train_data, df], axis=1, join='inner')

In [9]:
pickle.dump(train_data, open("saved/texter_texts_with_multilabels.pickle", "wb"))

## Tokenizing

In [3]:
train_data = pickle.load(open("saved/texts_with_multilabels.pickle","rb"))

In [4]:
## truncating longer sequences 
max_token_length = 512
train_data['text'] = train_data.text.apply(lambda x: x if len(x.split()) < max_token_length else " ".join(x.split()[:max_token_length]) )

In [5]:
%cd /
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

/


In [13]:
## padding to minimum length
min_token_length = 704
train_data['text'] = train_data.text.apply(lambda x: x if len(x.split()) >= min_token_length 
                                           else x + ((" " + tokenizer.pad_token)*(min_token_length - len(x.split()))))

In [6]:
## sorting the dataset by length
train_data['length'] = train_data.text.apply(lambda x: len(x.split()))
train_data = train_data.sort_values('length', axis=0)

In [7]:
train_data = pd.DataFrame(train_data.set_index("text"))
train_data = train_data.astype('int32')
train_data['labels'] = train_data[train_data.columns[:-1]].values.tolist()

In [8]:
## split into train and test data 
train_data, test_data = train_test_split(train_data, test_size=0.1)

train_data = datasets.Dataset.from_pandas(train_data)
test_data = datasets.Dataset.from_pandas(test_data)

In [9]:
cols = train_data.column_names
cols.remove('labels')

## tokenizing the text
def tokenization(batched_text):
    return tokenizer(batched_text['text'], padding = True, truncation=True, max_length = 512)

train_data = train_data.map(tokenization, batched = True, batch_size = len(train_data), remove_columns=cols)
test_data = test_data.map(tokenization, batched = True, batch_size = len(test_data), remove_columns=cols)

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [10]:
train_data.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
test_data.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])

In [13]:
%cd /data-imperial
pickle.dump(train_data, open("saved/roberta/ml_train_ds.pickle", "wb"))
pickle.dump(test_data, open("saved/roberta/ml_test_ds.pickle", "wb"))

/data-imperial


## Training

In [2]:
%cd /data-imperial/

train_data = pickle.load(open('saved/ml_train_ds.pickle', 'rb'))
test_data = pickle.load(open('saved/ml_test_ds.pickle', 'rb'))

/data-imperial


In [3]:
checkpoint = 'bigbird/output/finetune-ml/epoch-4'
model = BigBirdForSequenceClassification.from_pretrained(checkpoint, 
                                                        num_labels=9)

# model.save_pretrained("data-imperial/bigbird")

In [4]:
tokenizer = AutoTokenizer.from_pretrained("bigbird")

In [5]:
# metrics for training evaluation
# def compute_metrics(pred):
#     labels = pred.label_ids
#     pred_proba = torch.from_numpy(pred.predictions).sigmoid()
#     preds = (pred_proba>0.5).detach().numpy().astype('int32')
#     precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
#     acc = accuracy_score(labels, preds)
#     auprc = average_precision_score(labels, preds, average='weighted')
#     cm = multilabel_confusion_matrix(labels, preds)
#     print([b for i in cm for b in i[0]])
#     print([b for i in cm for b in i[1]])
#     return {
#         'accuracy': acc,
#         'f1': f1,
#         'precision': precision,
#         'recall': recall,
#         'auprc': auprc
#     }

# metrics for final evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    pred_proba = torch.from_numpy(pred.predictions).sigmoid()
    preds = (pred_proba>0.5).detach().numpy().astype('int32')
    df = []
    for i in range(9):
        label = labels[:,i]
        pred = preds[:,i]
        precision, recall, f1, _ = precision_recall_fscore_support(label, pred, average='binary')
        acc = accuracy_score(label, pred)
        auprc = average_precision_score(label, pred)
        mcc = matthews_corrcoef(label, pred)
        df.append([precision, recall, f1, acc, auprc, mcc])
        print(confusion_matrix(label, pred, labels=[0,1]))
    df = pd.DataFrame(df, columns=['precision', 'recall', 'f1', 'acc', 'auprc', 'mcc'])
    print(df)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'auprc': auprc
    }

In [6]:
# define the training arguments
training_args = TrainingArguments(
    output_dir = 'bigbird/output/finetune-ml',
    num_train_epochs = 5,
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 32,    
    per_device_eval_batch_size= 2,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    disable_tqdm = False, 
    load_best_model_at_end=False,
    warmup_steps=160,
    weight_decay=0.01,
    logging_steps = 4,
    learning_rate = 1e-5,
    log_level = 'warning', 
    fp16 = True,
    logging_dir='bigbird/logs/finetune-ml',
    dataloader_num_workers = 0,
    run_name = 'bigbird_ml_classification_test'
)

In [7]:
## custom trainer class for multilabel training
class MultilabelTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = BCEWithLogitsLoss()
        loss = loss_fct(logits.view(-1, self.model.config.num_labels),
                        labels.float().view(-1, self.model.config.num_labels))
        return (loss, outputs) if return_outputs else loss

In [8]:
%cd /data-imperial

# instantiate the trainer class and check for available devices
trainer = MultilabelTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_data,
    eval_dataset=test_data
)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
device

/data-imperial


device(type='cuda')

In [9]:
# train the model
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448238472/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Auprc
0,0.1724,0.204964,0.44722,0.798429,0.790415,0.807258,0.715852


  nn.utils.clip_grad_norm_(


[3783, 294, 4624, 164, 4903, 146, 5261, 67, 5460, 4, 3564, 590, 4809, 168, 3426, 580, 395, 325]
[306, 1102, 120, 577, 83, 353, 73, 84, 16, 5, 542, 789, 147, 361, 509, 970, 286, 4479]


KeyboardInterrupt: 

## Experiment on Texters

In [9]:
trainer.evaluate(eval_dataset=train_data)

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448238472/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[[39958   590]
 [10182  4112]]
[[47784    73]
 [ 6165   820]]
[[50275    22]
 [ 4272   273]]
[[53246     2]
 [ 1563    31]]
[[54489   112]
 [  171    70]]
[[37872  3869]
 [ 7277  5824]]
[[47830  2273]
 [ 1508  3231]]
[[34198  5935]
 [ 5249  9460]]
[[ 5886  1088]
 [10733 37135]]
   precision    recall        f1       acc     auprc       mcc
0   0.874521  0.287673  0.432933  0.803581  0.437237  0.428238
1   0.918253  0.117394  0.208175  0.886255  0.220212  0.305215
2   0.925424  0.060066  0.112810  0.921702  0.133483  0.224748
3   0.939394  0.019448  0.038107  0.971463  0.046769  0.132968
4   0.384615  0.290456  0.330969  0.994840  0.114832  0.331697
5   0.600846  0.444546  0.511012  0.796762  0.399794  0.393319
6   0.587028  0.681789  0.630870  0.931056  0.427726  0.595101
7   0.614485  0.643144  0.628488  0.796069  0.490914  0.488291
8   0.971535  0.775779  0.862692  0.784454  0.949405  0.449294


[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'eval_loss': 0.32863667607307434,
 'eval_accuracy': 0.7844535210240327,
 'eval_f1': 0.8626918028597647,
 'eval_precision': 0.9715354629411611,
 'eval_recall': 0.7757792262053982,
 'eval_auprc': 0.9494046989761449,
 'eval_runtime': 7168.104,
 'eval_samples_per_second': 7.651,
 'eval_steps_per_second': 3.825}

## Experiment for Helpfulness

In [9]:
trainer.evaluate(eval_dataset=train_data)

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448238472/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[[37969  2579]
 [ 3554 10740]]
[[46758  1099]
 [ 1710  5275]]
[[49449   848]
 [ 1485  3060]]
[[52984   264]
 [ 1188   406]]
[[54475   126]
 [  154    87]]
[[34348  7393]
 [ 5109  7992]]
[[48360  1743]
 [ 1652  3087]]
[[34372  5761]
 [ 5082  9627]]
[[ 2738  4236]
 [ 3360 44508]]
   precision    recall        f1       acc     auprc       mcc
0   0.806367  0.751364  0.777894  0.888170  0.670680  0.704075
1   0.827581  0.755190  0.789730  0.948780  0.656161  0.761671
2   0.783009  0.673267  0.724003  0.957460  0.554252  0.703445
3   0.605970  0.254705  0.358657  0.973524  0.176006  0.381919
4   0.408451  0.360996  0.383260  0.994894  0.150257  0.381438
5   0.519467  0.610030  0.561118  0.772036  0.410049  0.410889
6   0.639130  0.651403  0.645208  0.938095  0.446455  0.611334
7   0.625617  0.654497  0.639732  0.802287  0.502131  0.503830
8   0.913097  0.929807  0.921376  0.861493  0.910271  0.341677


[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'eval_loss': 0.23033231496810913,
 'eval_accuracy': 0.8614930163013749,
 'eval_f1': 0.9213762265557074,
 'eval_precision': 0.9130969965534219,
 'eval_recall': 0.9298069691652043,
 'eval_auprc': 0.9102708631397491,
 'eval_runtime': 7071.7171,
 'eval_samples_per_second': 7.755,
 'eval_steps_per_second': 3.878}

## Evaluation for Epoch 3

In [9]:
trainer.evaluate()

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448238472/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[[3711  366]
 [ 261 1147]]
[[4618  170]
 [ 109  588]]
[[4889  160]
 [  85  351]]
[[5274   54]
 [  98   59]]
[[5464    0]
 [  21    0]]
[[3784  370]
 [ 677  654]]
[[4831  146]
 [ 159  349]]
[[3724  282]
 [ 729  750]]
[[ 274  446]
 [ 149 4616]]
   precision    recall        f1       acc     auprc       mcc
0   0.758096  0.814631  0.785347  0.885688  0.665153  0.708437
1   0.775726  0.843615  0.808247  0.949134  0.674287  0.779896
2   0.686888  0.805046  0.741288  0.955333  0.568474  0.719718
3   0.522124  0.375796  0.437037  0.972288  0.214079  0.429243
4   0.000000  0.000000  0.000000  0.996171  0.003829  0.000000
5   0.638672  0.491360  0.555414  0.809116  0.437245  0.442583
6   0.705051  0.687008  0.695912  0.944394  0.513363  0.665385
7   0.726744  0.507099  0.597372  0.815679  0.501439  0.495863
8   0.911893  0.968730  0.939453  0.891522  0.910543  0.442127


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'eval_loss': 0.20128501951694489,
 'eval_accuracy': 0.8915223336371924,
 'eval_f1': 0.9394525287473288,
 'eval_precision': 0.9118925325958119,
 'eval_recall': 0.9687303252885624,
 'eval_auprc': 0.9105429451718668,
 'eval_runtime': 813.6131,
 'eval_samples_per_second': 6.742,
 'eval_steps_per_second': 3.371}

## Epoch 4

In [10]:
trainer.evaluate()

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448238472/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[[3637  440]
 [ 199 1209]]
[[4571  217]
 [  75  622]]
[[4823  226]
 [  47  389]]
[[5248   80]
 [  79   78]]
[[5464    0]
 [  21    0]]
[[3749  405]
 [ 655  676]]
[[4867  110]
 [ 201  307]]
[[3796  210]
 [ 778  701]]
[[ 245  475]
 [ 118 4647]]
   precision    recall        f1       acc     auprc       mcc
0   0.733172  0.858665  0.790972  0.883500  0.665829  0.715176
1   0.741359  0.892396  0.809896  0.946764  0.675259  0.783780
2   0.632520  0.892202  0.740247  0.950228  0.572905  0.726529
3   0.493671  0.496815  0.495238  0.971012  0.259666  0.480318
4   0.000000  0.000000  0.000000  0.996171  0.003829  0.000000
5   0.625347  0.507889  0.560531  0.806746  0.437023  0.442268
6   0.736211  0.604331  0.663784  0.943300  0.481560  0.636830
7   0.769484  0.473969  0.586611  0.819872  0.506553  0.502665
8   0.907263  0.975236  0.940022  0.891887  0.906309  0.428591


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'eval_loss': 0.20445317029953003,
 'eval_accuracy': 0.8918869644484959,
 'eval_f1': 0.9400222514412865,
 'eval_precision': 0.9072627879734478,
 'eval_recall': 0.9752360965372507,
 'eval_auprc': 0.9063086377436383,
 'eval_runtime': 815.736,
 'eval_samples_per_second': 6.724,
 'eval_steps_per_second': 3.363}

## Epoch 5

In [9]:
trainer.evaluate()

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448238472/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[[3765  312]
 [ 319 1089]]
[[4652  136]
 [ 150  547]]
[[4931  118]
 [ 108  328]]
[[5289   39]
 [ 101   56]]
[[5454   10]
 [  16    5]]
[[3493  661]
 [ 499  832]]
[[4830  147]
 [ 165  343]]
[[3484  522]
 [ 545  934]]
[[ 380  340]
 [ 329 4436]]
   precision    recall        f1       acc     auprc       mcc
0   0.777302  0.773438  0.775365  0.884959  0.659353  0.698051
1   0.800878  0.784792  0.792754  0.947858  0.655870  0.762986
2   0.735426  0.752294  0.743764  0.958797  0.572946  0.721419
3   0.589474  0.356688  0.444444  0.974476  0.228672  0.446539
4   0.333333  0.238095  0.277778  0.995260  0.082282  0.279399
5   0.557267  0.625094  0.589235  0.788514  0.439320  0.448802
6   0.700000  0.675197  0.687375  0.943118  0.502720  0.656228
7   0.641484  0.631508  0.636457  0.805469  0.504464  0.503702
8   0.928811  0.930955  0.929882  0.878031  0.924663  0.461745


[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'eval_loss': 0.2092665731906891,
 'eval_accuracy': 0.8780309936189608,
 'eval_f1': 0.9298815637773818,
 'eval_precision': 0.9288107202680067,
 'eval_recall': 0.9309548793284366,
 'eval_auprc': 0.9246626404654953,
 'eval_runtime': 810.754,
 'eval_samples_per_second': 6.765,
 'eval_steps_per_second': 3.383}

## MultiLabel RoBERTa Evaluation

In [10]:
trainer.evaluate()

[[3729  328]
 [ 401 1027]]
[[4661  151]
 [ 195  478]]
[[4894  152]
 [ 137  302]]
[[5259   62]
 [  98   66]]
[[5462    1]
 [  20    2]]
[[3872  288]
 [ 797  528]]
[[4878  133]
 [ 235  239]]
[[3745  332]
 [ 702  706]]
[[  45  673]
 [  35 4732]]
   precision    recall        f1       acc     auprc       mcc
0   0.757934  0.719188  0.738052  0.867092  0.618205  0.649492
1   0.759936  0.710253  0.734255  0.936919  0.575298  0.699027
2   0.665198  0.687927  0.676372  0.947311  0.482585  0.647810
3   0.515625  0.402439  0.452055  0.970830  0.225375  0.440852
4   0.666667  0.090909  0.160000  0.996171  0.064252  0.245261
5   0.647059  0.398491  0.493227  0.802188  0.403152  0.396037
6   0.642473  0.504219  0.565012  0.932908  0.366792  0.533798
7   0.680154  0.501420  0.577269  0.811486  0.469029  0.468356
8   0.875486  0.992658  0.930397  0.870921  0.875439  0.155675


[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'eval_loss': 0.23441153764724731,
 'eval_accuracy': 0.8709206927985415,
 'eval_f1': 0.9303971686983877,
 'eval_precision': 0.8754856614246068,
 'eval_recall': 0.9926578560939795,
 'eval_auprc': 0.875438758908582,
 'eval_runtime': 194.799,
 'eval_samples_per_second': 28.157,
 'eval_steps_per_second': 14.081}

## Finetuned Epoch 4

In [9]:
trainer.evaluate()

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448238472/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


  _warn_prf(average, modifier, msg_start, len(result))


[3661, 416, 4617, 171, 4897, 152, 5285, 43, 5464, 0, 3716, 438, 4808, 169, 3726, 280, 242, 478]
[204, 1204, 91, 606, 75, 361, 93, 64, 21, 0, 640, 691, 154, 354, 714, 765, 102, 4663]


[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'eval_loss': 0.20020583271980286,
 'eval_accuracy': 0.4641750227894257,
 'eval_f1': 0.7942772972955352,
 'eval_precision': 0.7918829878640782,
 'eval_recall': 0.8061470098129976,
 'eval_auprc': 0.706824153451631,
 'eval_runtime': 820.5507,
 'eval_samples_per_second': 6.685,
 'eval_steps_per_second': 3.343}

In [None]:
trainer.evaluate()

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448238472/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[[3661  416]
 [ 204 1204]]
[[4617  171]
 [  91  606]]
[[4897  152]
 [  75  361]]
[[5285   43]
 [  93   64]]
[[5464    0]
 [  21    0]]
[[3716  438]
 [ 640  691]]
[[4808  169]
 [ 154  354]]
[[3726  280]
 [ 714  765]]
[[ 242  478]
 [ 102 4663]]
   precision    recall        f1       acc     auprc       mcc
0   0.743210  0.855114  0.795244  0.886964  0.672721  0.721073
1   0.779923  0.869440  0.822252  0.952233  0.694687  0.796321
2   0.703704  0.827982  0.760801  0.958614  0.596327  0.741238
3   0.598131  0.407643  0.484848  0.975205  0.260779  0.481754
4   0.000000  0.000000  0.000000  0.996171  0.003829  0.000000
5   0.612046  0.519159  0.561789  0.803464  0.434431  0.438668
6   0.676864  0.696850  0.686712  0.941112  0.499750  0.654306
7   0.732057  0.517241  0.606181  0.818778  0.508824  0.505515
8   0.907022  0.978594  0.941450  0.894257  0.906202  0.438327


  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


## Epoch 5

In [9]:
trainer.evaluate()

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448238472/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[3783, 294, 4624, 164, 4903, 146, 5261, 67, 5460, 4, 3564, 590, 4809, 168, 3426, 580, 395, 325]
[306, 1102, 120, 577, 83, 353, 73, 84, 16, 5, 542, 789, 147, 361, 509, 970, 286, 4479]


[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'eval_loss': 0.20496368408203125,
 'eval_accuracy': 0.4472196900638104,
 'eval_f1': 0.7984294759274478,
 'eval_precision': 0.7904147591073108,
 'eval_recall': 0.8072579152008887,
 'eval_auprc': 0.7158517737385894,
 'eval_runtime': 818.7782,
 'eval_samples_per_second': 6.699,
 'eval_steps_per_second': 3.35}

In [9]:
trainer.evaluate()

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448238472/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[[3783  294]
 [ 306 1102]]
[[4624  164]
 [ 120  577]]
[[4903  146]
 [  83  353]]
[[5261   67]
 [  73   84]]
[[5460    4]
 [  16    5]]
[[3564  590]
 [ 542  789]]
[[4809  168]
 [ 147  361]]
[[3426  580]
 [ 509  970]]
[[ 395  325]
 [ 286 4479]]
   precision    recall        f1       acc     auprc       mcc
0   0.789398  0.782670  0.786020  0.890611  0.673627  0.712558
1   0.778677  0.827834  0.802503  0.948222  0.666493  0.773221
2   0.707415  0.809633  0.755080  0.958250  0.587879  0.734366
3   0.556291  0.535032  0.545455  0.974476  0.310943  0.532436
4   0.555556  0.238095  0.333333  0.996354  0.135192  0.362181
5   0.572154  0.592787  0.582288  0.793619  0.437980  0.445422
6   0.682420  0.710630  0.696239  0.942571  0.511748  0.664707
7   0.625806  0.655849  0.640475  0.801459  0.503233  0.503707
8   0.932348  0.939979  0.936148  0.888605  0.928530  0.500342


[34m[1mwandb[0m: Currently logged in as: [33mkiatann[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


{'eval_loss': 0.20496368408203125,
 'eval_accuracy': 0.8886052871467639,
 'eval_f1': 0.9361479778451248,
 'eval_precision': 0.9323480432972523,
 'eval_recall': 0.9399790136411332,
 'eval_auprc': 0.9285298001252001,
 'eval_runtime': 818.4317,
 'eval_samples_per_second': 6.702,
 'eval_steps_per_second': 3.352}