Install library

In [None]:
!pip install simpletransformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting simpletransformers
  Downloading simpletransformers-0.63.11-py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.7/250.7 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Collecting transformers>=4.6.0 (from simpletransformers)
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m73.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets (from simpletransformers)
  Downloading datasets-2.13.0-py3-none-any.whl (485 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.6/485.6 kB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Pr

Mount to Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/mads_thesis')
!pwd

Mounted at /content/gdrive
/content/gdrive/My Drive/mads_thesis


Load Libraries

In [None]:
import pandas as pd
import numpy as np
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from scipy.special import softmax

Set Environment Variable

In [None]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

Load and format data

In [None]:
train_data = pd.read_pickle('Video_Games_final_train.pkl.gz')
dev_data = pd.read_pickle('Video_Games_final_dev.pkl.gz')
test_data = pd.read_pickle('Video_Games_final_test.pkl.gz')
op_data = pd.read_pickle('Office_Products_final_half.pkl.gz')
tg_data = pd.read_pickle('Toys_&_Games_final_half.pkl.gz')

labels = {'Unhelpful': 0, 'Helpful': 1}
train_data['label'] = [labels[i] for i in train_data['helpfulness']]
dev_data['label'] = [labels[i] for i in dev_data['helpfulness']]
test_data['label'] = [labels[i] for i in test_data['helpfulness']]
op_data['label'] = [labels[i] for i in op_data['helpfulness']]
tg_data['label'] = [labels[i] for i in tg_data['helpfulness']]

train_data[['review_time_scl', 'rating_scl', 'richness_scl']] = train_data[['review_time_scl', 'rating_scl', 'richness_scl']].astype(str)
dev_data[['review_time_scl', 'rating_scl', 'richness_scl']] = dev_data[['review_time_scl', 'rating_scl', 'richness_scl']].astype(str)
test_data[['review_time_scl', 'rating_scl', 'richness_scl']] = test_data[['review_time_scl', 'rating_scl', 'richness_scl']].astype(str)
op_data[['review_time_scl', 'rating_scl', 'richness_scl']] = op_data[['review_time_scl', 'rating_scl', 'richness_scl']].astype(str)
tg_data[['review_time_scl', 'rating_scl', 'richness_scl']] = tg_data[['review_time_scl', 'rating_scl', 'richness_scl']].astype(str)

train_data['text'] = train_data[['review_text', 'review_time_scl', 'rating_scl', 'richness_scl']].agg(' [SEP] '.join, axis=1)
dev_data['text'] = dev_data[['review_text', 'review_time_scl', 'rating_scl', 'richness_scl']].agg(' [SEP] '.join, axis=1)
test_data['text'] = test_data[['review_text', 'review_time_scl', 'rating_scl', 'richness_scl']].agg(' [SEP] '.join, axis=1)
op_data['text'] = op_data[['review_text', 'review_time_scl', 'rating_scl', 'richness_scl']].agg(' [SEP] '.join, axis=1)
tg_data['text'] = tg_data[['review_text', 'review_time_scl', 'rating_scl', 'richness_scl']].agg(' [SEP] '.join, axis=1)

train_df = train_data[['text', 'label']]
dev_df = dev_data[['text', 'label']]
test_df = test_data[['text', 'label']]
op_df = op_data[['text', 'label']]
tg_df = tg_data[['text', 'label']]

Model Parameters

In [None]:
model_args = {
    'model': 'bert-base-uncased',
    'type': 'bert',
    'train_batch_size': 128,
    'learning_rate': 4e-5,
    'epochs': 5,
    'use_early_stopping': True,
    'eval_during_training': True
}

Function to create BERT model

In [None]:
def bert_model(train_df, dev_df, config):
    model_args = ClassificationArgs(
        overwrite_output_dir=True,
        do_lower_case=True,
        save_steps=-1,
        train_batch_size=config['train_batch_size'],
        learning_rate=config['learning_rate'],
        num_train_epochs=config['epochs'],
        output_dir='content/outputs',
        use_early_stopping = config['use_early_stopping'],
        evaluate_during_training = config['eval_during_training'],
        use_multiprocessing=False,
        use_multiprocessing_for_evaluation=False
    )

    model = ClassificationModel(
        config['type'],
        config['model'],
        num_labels=train_df['label'].nunique(),
        args=model_args,
        use_cuda=True,
    )
    model.train_model(train_df, eval_df=dev_df)

    return model

Train model

In [None]:
model = bert_model(train_df, dev_df, model_args)

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/198 [00:00<?, ?it/s]



Running Epoch 1 of 5:   0%|          | 0/198 [00:00<?, ?it/s]



Running Epoch 2 of 5:   0%|          | 0/198 [00:00<?, ?it/s]



Running Epoch 3 of 5:   0%|          | 0/198 [00:00<?, ?it/s]



Running Epoch 4 of 5:   0%|          | 0/198 [00:00<?, ?it/s]



Function to evaluate trained BERT model

In [None]:
def evaluate_model(model, test_df):
    test_y = test_df['label']
    _, model_outputs = model.predict(test_df['text'].tolist())
    probabilities = softmax(model_outputs, axis=1)
    bin_predict_y = np.argmax(probabilities, axis=1)
    predict_y = probabilities[:, 1]
    print(classification_report(test_y, bin_predict_y, ))

    deciles = pd.DataFrame([(p, t) for p,t in zip(predict_y, test_y)], columns=['Predicted', 'Observed'])
    deciles['Decile'] = pd.qcut(deciles['Predicted'], 10, labels=False)
    mean_actual = deciles['Observed'].mean()
    deciles_mean = deciles.groupby(['Decile']).agg(
        Predicted=('Predicted', 'mean'),
        Observed=('Observed', 'mean')).sort_values('Decile', ascending=False)

    deciles_mean['Lift'] = deciles_mean.apply(lambda row: row['Observed']/mean_actual, axis=1)

    tdl = deciles_mean['Lift'].iloc[0] / deciles_mean['Lift'].mean()
    print(f'\nTDL is {tdl}\n')

    gini_auc = roc_auc_score(test_y, predict_y)
    gini = gini_auc*2-1
    print(f'\nGINI is {gini}\n')

Run Evaluation

In [13]:
print('#### DEV EVALUATION ####')
evaluate_model(model, dev_df)
print('#### TEST EVALUATION ####')
evaluate_model(model, test_df)
print('#### ROBUST EVALUATION: Office Products ####')
evaluate_model(model, op_df)
print('#### ROBUST EVALUATION: Toys & Games ####')
evaluate_model(model, tg_df)

#### DEV EVALUATION ####


  0%|          | 0/351 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.81      0.67      0.73      1266
           1       0.76      0.87      0.81      1542

    accuracy                           0.78      2808
   macro avg       0.78      0.77      0.77      2808
weighted avg       0.78      0.78      0.78      2808


TDL is 1.7166327919811486


GINI is 0.7117523455924992

#### TEST EVALUATION ####


  0%|          | 0/878 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.80      0.65      0.72      3164
           1       0.75      0.87      0.81      3855

    accuracy                           0.77      7019
   macro avg       0.78      0.76      0.76      7019
weighted avg       0.78      0.77      0.77      7019


TDL is 1.6878881017082141


GINI is 0.7040452660524283

#### ROBUST EVALUATION: Office Products ####


  0%|          | 0/871 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.57      0.42      0.48      1071
           1       0.90      0.94      0.92      5891

    accuracy                           0.86      6962
   macro avg       0.74      0.68      0.70      6962
weighted avg       0.85      0.86      0.85      6962


TDL is 1.1696781219646715


GINI is 0.6783548818157943

#### ROBUST EVALUATION: Toys & Games ####


  0%|          | 0/2707 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.52      0.53      0.53      3587
           1       0.91      0.90      0.90     18065

    accuracy                           0.84     21652
   macro avg       0.71      0.72      0.72     21652
weighted avg       0.84      0.84      0.84     21652


TDL is 1.1772126499343714


GINI is 0.6692901782438365

