In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import datasets
from datasets import load_dataset, load_metric
import numpy as np
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [41]:
# training info
training_dataset = "social_bias_frames"
training_dataset_split = "train"

datasets_dir = "/scratch/pw1329/datasets"

train_labels_column = "offensiveYN"
train_features_column = "post" # the column tokenized for eval

num_labels = 2

relabel_training = None

def relabel_training(offensive):
    if offensive:
        return float(offensive)
    else:
        return None

In [29]:
# evaluate info
evaluate_dataset = "cc_news"
evaluate_input_column = 'text' # the column tokenized for eval

In [30]:
# trainer
model_checkpoint = "bert-base-cased"
batch_size = 8
metric=load_metric('bertscore')
trials = 10

In [31]:
dataset = load_dataset(training_dataset, split=training_dataset_split, cache_dir=datasets_dir)

Using custom data configuration default
Reusing dataset social_bias_frames (/scratch/pw1329/datasets/social_bias_frames/default/0.0.0/7ccf5e07dabdba6791693ea27289996d4771f586aa88f1ff05c52645f2cfd41d)


In [32]:
dataset.features

{'whoTarget': Value(dtype='string', id=None),
 'intentYN': Value(dtype='string', id=None),
 'sexYN': Value(dtype='string', id=None),
 'sexReason': Value(dtype='string', id=None),
 'offensiveYN': Value(dtype='string', id=None),
 'annotatorGender': Value(dtype='string', id=None),
 'annotatorMinority': Value(dtype='string', id=None),
 'sexPhrase': Value(dtype='string', id=None),
 'speakerMinorityYN': Value(dtype='string', id=None),
 'WorkerId': Value(dtype='string', id=None),
 'HITId': Value(dtype='string', id=None),
 'annotatorPolitics': Value(dtype='string', id=None),
 'annotatorRace': Value(dtype='string', id=None),
 'annotatorAge': Value(dtype='string', id=None),
 'post': Value(dtype='string', id=None),
 'targetMinority': Value(dtype='string', id=None),
 'targetCategory': Value(dtype='string', id=None),
 'targetStereotype': Value(dtype='string', id=None),
 'dataSource': Value(dtype='string', id=None)}

In [33]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [34]:
d = dataset.train_test_split(test_size=0.1)

In [None]:
d = d.map(lambda x: {'labels': relabel_training(x[train_labels_column])})

HBox(children=(FloatProgress(value=0.0, max=101610.0), HTML(value='')))

In [19]:
d = d.map(lambda x: tokenizer(x[train_features_column], truncation=True, padding=True), batched=True)

HBox(children=(FloatProgress(value=0.0, max=102.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




In [20]:
metric_name = "accuracy"

args = TrainingArguments(
    "test-bert",
    evaluation_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    logging_dir="./logs"
)

In [21]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
    }


In [22]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

In [26]:
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=d['train'],
    eval_dataset=d['test'],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [27]:
best_run = trainer.hyperparameter_search(n_trials=trials, direction="maximize")

[32m[I 2021-04-10 15:14:37,987][0m A new study created in memory with name: no-name-8b978121-6f55-444e-8e16-b951cfe821d6[0m
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForS

ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length.

In [None]:
trainer.evaluate()

In [None]:
# load evaluate dataset

eval_dataset = load_dataset(evaluate_dataset, cache_dir=dataset_dir)

In [None]:
eval_dataset = eval_dataset.map(lambda x: tokenizer(x[evaluate_input_column]), batched=True)

In [None]:
type(sbf['train'])

In [None]:
res = trainer.predict(sbf['validation'])

In [None]:
res[0][:20]

In [None]:
#uncomment to see posts
#sbf['train']['post'][:20]

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs/Mar28_22-25-53_gr004.nyu.cluster