# Ensemble - Roberta + Logistic Regression

This notebook tests Logistic Regression, RoBERTa, And 2 ensembles performance on test set and timing of the models.

>**Note:** This was run in Google Colab, so there is no direct reference to the data. The data used was the same as in repository.

## Imports

In [2]:
from google.colab import drive
import glob

drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
!pip install simpletransformers -q

[K     |████████████████████████████████| 204kB 13.2MB/s 
[K     |████████████████████████████████| 1.1MB 32.8MB/s 
[K     |████████████████████████████████| 71kB 9.3MB/s 
[K     |████████████████████████████████| 317kB 33.9MB/s 
[K     |████████████████████████████████| 7.4MB 54.0MB/s 
[K     |████████████████████████████████| 1.8MB 50.1MB/s 
[K     |████████████████████████████████| 2.9MB 56.3MB/s 
[K     |████████████████████████████████| 1.4MB 49.3MB/s 
[K     |████████████████████████████████| 51kB 9.2MB/s 
[K     |████████████████████████████████| 102kB 14.3MB/s 
[K     |████████████████████████████████| 163kB 52.9MB/s 
[K     |████████████████████████████████| 112kB 53.2MB/s 
[K     |████████████████████████████████| 4.5MB 49.1MB/s 
[K     |████████████████████████████████| 133kB 59.6MB/s 
[K     |████████████████████████████████| 102kB 14.3MB/s 
[K     |████████████████████████████████| 890kB 46.5MB/s 
[K     |████████████████████████████████| 71kB 10.7MB/s 
[

In [3]:
import pandas as pd
import numpy as np
import torch 
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
from sklearn.model_selection import KFold
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

## Load Data

In [9]:
# CHANGE TO YOUR PATH
colab_resources_path = "/content/drive/My Drive/Machine Learning/Project/colab_resources"

In [10]:
data_files = glob.glob(colab_resources_path + "/*.csv")
data_files += glob.glob(colab_resources_path + "/*.py")
for data_file in data_files:
    print('Copying file {} to colab root.'.format(data_file))
    !cp "$data_file" .

Copying file /content/drive/My Drive/Machine Learning/Project/colab_resources/nam.csv to colab root.
Copying file /content/drive/My Drive/Machine Learning/Project/colab_resources/am.csv to colab root.
Copying file /content/drive/My Drive/Machine Learning/Project/colab_resources/test.csv to colab root.
Copying file /content/drive/My Drive/Machine Learning/Project/colab_resources/am_additional.csv to colab root.
Copying file /content/drive/My Drive/Machine Learning/Project/colab_resources/random.csv to colab root.
Copying file /content/drive/My Drive/Machine Learning/Project/colab_resources/data_preprocess.py to colab root.


In [11]:
from data_preprocess import getTrainData, getTestData

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [12]:
train_data_all = getTrainData(include_random=True) # article title + body
train_data_title = getTrainData(include_random=True, n_sentences=0) # article title
train_data_body = getTrainData(include_random=True, no_title=True) # article body

test_data_all = getTestData() # article title + body
test_data_title = getTestData(n_sentences=0) # article title
test_data_body = getTestData(no_title=True) # article body

## Test

In [43]:
def getResults(model, labels, predictions, time):
    acc = np.round(accuracy_score(labels, predictions), 4)
    precision = np.round(precision_score(labels, predictions), 4)
    recall = np.round(recall_score(labels, predictions), 4)
    f1 = np.round(f1_score(labels, predictions), 4)
    mcc = np.round(matthews_corrcoef(labels, predictions), 4)
    
    return pd.DataFrame(np.array([[model, acc, precision, recall, f1, mcc, time]]), columns = ['model', 'accuracy', 'precision', 'recall', 'f1', 'mcc', 'time'])

### Logistic Regression - Title + Body

In [28]:
def logreg_predict(vectorizer, logreg, X_test):
    X_test_v = vectorizer.transform(X_test)
    return logreg.predict(X_test_v)

In [35]:
vectorizer = TfidfVectorizer(strip_accents='ascii', lowercase=True, stop_words='english')
logreg = LogisticRegression(random_state=0, C=17, penalty='l2', max_iter=1000)

# Train
X_train_v = vectorizer.fit_transform(train_data_all['text'].array)
y_train = train_data_all['label'].array

logreg.fit(X_train_v, y_train)

# Predict
predictions = logreg_predict(vectorizer, logreg, test_data_all['text'].array)

In [39]:
%%timeit
logreg_predict(vectorizer, logreg, test_data_all['text'].array)

10 loops, best of 3: 67.9 ms per loop


In [44]:
labels = test_data_all['label'].array
result_logreg = getResults("logreg", labels, predictions, "67.9 ms")
result_logreg

Unnamed: 0,model,accuracy,precision,recall,f1,mcc,time
0,logreg,0.9308,0.9778,0.9072,0.9412,0.861,67.9 ms


### RoBERTa

In [49]:
model_args= ClassificationArgs(sliding_window=True)
model_args.num_train_epochs=4
model_args.save_best_model= True
model_args.tie_value = 1
model_args.batch_size = 16
model_args.learning_rate = 2e-5
model_args.overwrite_output_dir = True
model_args.max_seq_length = 512
model_args.max_grad_norm = 1
model_args.use_multiprocessing = True
model_args.manual_seed = 4
model_args.reprocess_input_data = True
model_args.labels_list = [0, 1]

In [50]:
# Train
train_data_all_r = train_data_all.rename(columns={"label": "labels"})
roberta_all = ClassificationModel('roberta', 'roberta-base', args=model_args)
roberta_all.train_model(train_data_all_r, acc=matthews_corrcoef)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1594.0), HTML(value='')))




HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(HTML(value='Running Epoch 0 of 4'), FloatProgress(value=0.0, max=588.0), HTML(value='')))






HBox(children=(HTML(value='Running Epoch 1 of 4'), FloatProgress(value=0.0, max=588.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 2 of 4'), FloatProgress(value=0.0, max=588.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 3 of 4'), FloatProgress(value=0.0, max=588.0), HTML(value='')))





(2352, 0.13145058261540238)

In [55]:
def roberta_predict(roberta, X_test):
    result, model_outputs = roberta.predict(X_test)
    return np.array([np.rint(np.mean(np.argmax(j, axis=1))) for j in model_outputs]).astype(int)

In [92]:
predictions_roberta = roberta_predict(roberta_all, test_data_all['text'].array)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))




In [57]:
%%timeit
roberta_predict(roberta_all, test_data_all['text'].array)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))


1 loop, best of 3: 9.21 s per loop


In [93]:
labels = test_data_all['label'].array
result_roberta_all = getResults("roberta_all", labels, predictions_roberta, "9.21 s")
result_roberta_all

Unnamed: 0,model,accuracy,precision,recall,f1,mcc,time
0,roberta_all,0.9434,0.949,0.9588,0.9538,0.8808,9.21 s


### Ensemble: RoBERTa + LR

In [65]:
def logreg_predict_proba(vectorizer, logreg, X_test):
    X_test_v = vectorizer.transform(X_test)
    return logreg.predict_proba(X_test_v)

In [70]:
from scipy.special import softmax
def getProbabilitiesRoberta(pred):
  
  return np.array([np.sum(softmax(j, axis=1), axis=0)/len(j) for j in pred])

In [75]:
def roberta_predic_proba(roberta, X_test):
    result, model_outputs = roberta.predict(X_test)
    return getProbabilitiesRoberta(model_outputs)

In [125]:
def ensemble_roberta_lr_predict(roberta, vectorizer, logreg, X_test):

    prob_rb = roberta_predic_proba(roberta, X_test)
    prob_lr = logreg_predict_proba(vectorizer, logreg, X_test)

    w_lr = 1 # LR MCC cv6 score
    w_rf = 1 # RoBERTa MCC cv6 score

    prob_lr = prob_lr[:, 0]
    prob_rb = prob_rb[:, 0]

    prob = (prob_lr*w_lr + prob_rb*w_rf)/(w_lr+w_rf)

    return np.where(prob > 0.5, 0, 1)

In [126]:
predictions_ensemble_roberta_and_lr = ensemble_roberta_lr_predict(roberta_all, vectorizer, logreg, test_data_all['text'].array)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))




In [80]:
%%timeit
ensemble_roberta_lr_predict(roberta_all, vectorizer, logreg, test_data_all['text'].array)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))


1 loop, best of 3: 9.48 s per loop


In [127]:
labels = test_data_all['label'].array
result_ensemble_roberta_and_lr = getResults("ensemble_roberta_and_lr", labels, predictions_ensemble_roberta_and_lr, "9.48 s")
result_ensemble_roberta_and_lr

Unnamed: 0,model,accuracy,precision,recall,f1,mcc,time
0,ensemble_roberta_and_lr,0.9434,0.9583,0.9485,0.9534,0.8814,9.48 s


### Ensemble: RoBERTa-Title + RoBERTa-Body + LR

In [106]:
# Train RoBERTa-Title
train_data_title_r = train_data_title.rename(columns={"label": "labels"})
roberta_title = ClassificationModel('roberta', 'roberta-base', args=model_args)
roberta_title.train_model(train_data_title_r, acc=matthews_corrcoef)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1594.0), HTML(value='')))




HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(HTML(value='Running Epoch 0 of 4'), FloatProgress(value=0.0, max=200.0), HTML(value='')))






HBox(children=(HTML(value='Running Epoch 1 of 4'), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 2 of 4'), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 3 of 4'), FloatProgress(value=0.0, max=200.0), HTML(value='')))





(800, 0.22571574255125598)

In [107]:
# Train RoBERTa-Body
train_data_body_r = train_data_body.rename(columns={"label": "labels"})
roberta_body = ClassificationModel('roberta', 'roberta-base', args=model_args)
roberta_body.train_model(train_data_body_r, acc=matthews_corrcoef)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1594.0), HTML(value='')))

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f4bc09fc0b8>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1203, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1177, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/usr/lib/python3.6/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f4baeccb4e0>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1203, in __del__
    sel




HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(HTML(value='Running Epoch 0 of 4'), FloatProgress(value=0.0, max=581.0), HTML(value='')))






HBox(children=(HTML(value='Running Epoch 1 of 4'), FloatProgress(value=0.0, max=581.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 2 of 4'), FloatProgress(value=0.0, max=581.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 3 of 4'), FloatProgress(value=0.0, max=581.0), HTML(value='')))





(2324, 0.12713231279851928)

In [110]:
def ensemble_roberta_title_and_body_and_lr_predict(roberta_title, roberta_body, vectorizer, logreg, X_test_all, X_test_title, X_test_body):

    prob_rb_title = roberta_predic_proba(roberta_title, X_test_title)
    prob_rb_body = roberta_predic_proba(roberta_body, X_test_body)
    prob_lr = logreg_predict_proba(vectorizer, logreg, X_test_all)

    w_lr = 0.877
    w_rb_title = 0.863
    w_rb_body = 0.901

    prob_lr = prob_lr[:, 0]
    prob_rb_title = prob_rb_title[:, 0]
    prob_rb_body = prob_rb_body[:, 0]

    prob = (prob_lr*w_lr + prob_rb_body*w_rb_body+prob_rb_title*w_rb_title)/(w_lr+w_rb_title+w_rb_body)

    return np.where(prob > 0.5, 0, 1)

In [112]:
X_test_all = test_data_all['text'].array
X_test_title = test_data_title['text'].array
X_test_body = test_data_body['text'].array
predictions_ensemble_roberta_title_and_body_and_lr = ensemble_roberta_title_and_body_and_lr_predict(roberta_title, roberta_body, vectorizer, logreg, X_test_all, X_test_title, X_test_body)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=56.0), HTML(value='')))




In [116]:
%%timeit
ensemble_roberta_title_and_body_and_lr_predict(roberta_title, roberta_body, vectorizer, logreg, X_test_all, X_test_title, X_test_body)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=56.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=56.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=56.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=159.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=56.0), HTML(value='')))


1 loop, best of 3: 12.1 s per loop


In [119]:
result_ensemble_roberta_title_and_body_and_lr = getResults("ensemble_roberta_title_and_body_and_lr", labels, predictions_ensemble_roberta_title_and_body_and_lr, "12.1 s")

## Results

In [122]:
results = pd.concat([result_logreg, result_roberta_all, result_ensemble_roberta_and_lr, result_ensemble_roberta_title_and_body_and_lr], ignore_index=True)
results

Unnamed: 0,model,accuracy,precision,recall,f1,mcc,time
0,logreg,0.9308,0.9778,0.9072,0.9412,0.861,67.9 ms
1,roberta_all,0.9434,0.949,0.9588,0.9538,0.8808,9.21 s
2,ensemble_roberta_and_lr,0.9434,0.9583,0.9485,0.9534,0.8814,9.48 s
3,ensemble_roberta_title_and_body_and_lr,0.956,0.9787,0.9485,0.9634,0.909,12.1 s


In [123]:
!lscpu |grep 'Model name'

Model name:          Intel(R) Xeon(R) CPU @ 2.20GHz


In [114]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-d79d8ced-daed-ec0c-41d4-b24514fd8ea3)


In [None]:
while True:pass

# Unimportant

In [None]:
n=6
seed=42
kf = KFold(n_splits=n, random_state=seed, shuffle=True)
mcc_lr, f1_lr = [], []
mcc_rb, f1_rb = [], []
acc, prec, rec, f1, mcc = [], [],[],[], []

for train_index, val_index in kf.split(train_data): 
    train_df = train_data.iloc[train_index]
    val_df = train_data.iloc[val_index]

    ### LogReg
    cv = TfidfVectorizer(strip_accents='ascii', lowercase=True, stop_words='english')
    X_train_cv = cv.fit_transform(train_df.text)
    X_val_cv = cv.transform(val_df.text)
    
    lr = LogisticRegression(random_state=0, C=17, penalty='l2', max_iter=1000)
    lr.fit(X_train_cv, train_df.labels)
    predictions_lr = lr.predict(X_val_cv)

    f1_lr.append(f1_score(val_df.labels, predictions_lr))
    mcc_lr.append(matthews_corrcoef(val_df.labels, predictions_lr))

    #### RoBERTa
    model = ClassificationModel('roberta', 'roberta-base', args=model_args)
    model.train_model(train_df, eval_df=val_df, acc=matthews_corrcoef)
    result, model_outputs, wrong_predictions = model.eval_model(val_df, acc=matthews_corrcoef) 

    predictions_rb = np.array([np.rint(np.mean(np.argmax(j, axis=1))) for j in model_outputs]).astype(int)

    f1_rb.append(f1_score(val_df.labels, predictions_rb))
    mcc_rb.append(matthews_corrcoef(val_df.labels, predictions_rb))

    ##### ENSEMBLE
    w_lr = 0.94 # LR F1 score
    w_rf = 0.95 # RoBERTa F1 score

    prob_lr = np.array(lr.predict_proba(X_val_cv))
    prob_rb = getProbabilitiesRoberta(model_outputs)

    prob_lr = prob_lr[:, 0]
    prob_rb = prob_rb[:, 0]

    prob = (prob_lr*w_lr + prob_rb*w_rf)/(w_lr+w_rf)

    predictions = np.where(prob > 0.5, 0, 1)

    acc.append(accuracy_score(val_df.labels, predictions))
    prec.append(precision_score(val_df.labels, predictions))
    rec.append(recall_score(val_df.labels, predictions))
    f1.append(f1_score(val_df.labels, predictions))
    mcc.append(matthews_corrcoef(val_df.labels, predictions))


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1328.0), HTML(value='')))




HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(HTML(value='Running Epoch 0 of 4'), FloatProgress(value=0.0, max=494.0), HTML(value='')))






HBox(children=(HTML(value='Running Epoch 1 of 4'), FloatProgress(value=0.0, max=494.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 2 of 4'), FloatProgress(value=0.0, max=494.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 3 of 4'), FloatProgress(value=0.0, max=494.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=266.0), HTML(value='')))




HBox(children=(HTML(value='Running Evaluation'), FloatProgress(value=0.0, max=94.0), HTML(value='')))




Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1328.0), HTML(value='')))




HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(HTML(value='Running Epoch 0 of 4'), FloatProgress(value=0.0, max=482.0), HTML(value='')))






HBox(children=(HTML(value='Running Epoch 1 of 4'), FloatProgress(value=0.0, max=482.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 2 of 4'), FloatProgress(value=0.0, max=482.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 3 of 4'), FloatProgress(value=0.0, max=482.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=266.0), HTML(value='')))




HBox(children=(HTML(value='Running Evaluation'), FloatProgress(value=0.0, max=107.0), HTML(value='')))




Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1328.0), HTML(value='')))




HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(HTML(value='Running Epoch 0 of 4'), FloatProgress(value=0.0, max=493.0), HTML(value='')))






HBox(children=(HTML(value='Running Epoch 1 of 4'), FloatProgress(value=0.0, max=493.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 2 of 4'), FloatProgress(value=0.0, max=493.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 3 of 4'), FloatProgress(value=0.0, max=493.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=266.0), HTML(value='')))




HBox(children=(HTML(value='Running Evaluation'), FloatProgress(value=0.0, max=96.0), HTML(value='')))




Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1328.0), HTML(value='')))




HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(HTML(value='Running Epoch 0 of 4'), FloatProgress(value=0.0, max=499.0), HTML(value='')))






HBox(children=(HTML(value='Running Epoch 1 of 4'), FloatProgress(value=0.0, max=499.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 2 of 4'), FloatProgress(value=0.0, max=499.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 3 of 4'), FloatProgress(value=0.0, max=499.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=266.0), HTML(value='')))




HBox(children=(HTML(value='Running Evaluation'), FloatProgress(value=0.0, max=90.0), HTML(value='')))




Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1329.0), HTML(value='')))




HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(HTML(value='Running Epoch 0 of 4'), FloatProgress(value=0.0, max=488.0), HTML(value='')))






HBox(children=(HTML(value='Running Epoch 1 of 4'), FloatProgress(value=0.0, max=488.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 2 of 4'), FloatProgress(value=0.0, max=488.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 3 of 4'), FloatProgress(value=0.0, max=488.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=265.0), HTML(value='')))




HBox(children=(HTML(value='Running Evaluation'), FloatProgress(value=0.0), HTML(value='')))




Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1329.0), HTML(value='')))




HBox(children=(HTML(value='Epoch'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(HTML(value='Running Epoch 0 of 4'), FloatProgress(value=0.0, max=485.0), HTML(value='')))






HBox(children=(HTML(value='Running Epoch 1 of 4'), FloatProgress(value=0.0, max=485.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 2 of 4'), FloatProgress(value=0.0, max=485.0), HTML(value='')))




HBox(children=(HTML(value='Running Epoch 3 of 4'), FloatProgress(value=0.0, max=485.0), HTML(value='')))





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=265.0), HTML(value='')))




HBox(children=(HTML(value='Running Evaluation'), FloatProgress(value=0.0, max=104.0), HTML(value='')))




### Results

In [None]:
print('Logistic regression score: ')
print('F1 LR: ', np.round(np.mean(f1_lr), 4))
print('MCC LR: ', np.round_(np.mean(mcc_lr), 4))
print()

print('RoBERTa score: ')
print('F1 RoBERTa: ', np.round(np.mean(f1_rb), 4))
print('MCC RoBERTa: ', np.round(np.mean(mcc_rb), 4))
print()

print('Ensemble score: ')
print('Accuracy Ensemble: ', np.round(np.mean(acc), 4))
print('Precision Ensemble: ', np.round(np.mean(prec), 4))
print('Recall Ensemble: ', np.round(np.mean(rec), 4))
print('F1 Ensemble: ', np.round(np.mean(f1), 4))
print('MCC Ensemble: ', np.round(np.mean(mcc), 4))

Logistic regression score: 
F1 LR:  0.9398
MCC LR:  0.8771

RoBERTa score: 
F1 RoBERTa:  0.9511
MCC RoBERTa:  0.9011

Ensemble score: 
Accuracy Ensemble:  0.9573
Precision Ensemble:  0.9552
Recall Ensemble:  0.9619
F1 Ensemble:  0.9583
MCC Ensemble:  0.915


**Conclusion:** Ensemble score (F1: **0.9583**) **is better** than RoBERTa score (F1: **0.9511**) in F1 and MCC.
