In [26]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import datasets
from datasets import load_dataset, load_metric
import numpy as np
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [4]:
dataset = load_dataset("peixian/equity_evaluation_corpus", split="train")

Reusing dataset equity_evaluation_corpus (/home/pw1329/.cache/huggingface/datasets/equity_evaluation_corpus/first_domain/1.1.0/697923ba8aa88f727772216e9fdc5fe17f04e9ea530aa691c0fed82207aa4c1a)


In [5]:
dataset.features

{'sentence': Value(dtype='string', id=None),
 'template': Value(dtype='string', id=None),
 'person': Value(dtype='string', id=None),
 'gender': Value(dtype='string', id=None),
 'race': Value(dtype='string', id=None),
 'emotion': Value(dtype='string', id=None),
 'emotion word': Value(dtype='string', id=None)}

In [6]:
model_checkpoint = "bert-base-cased"
batch_size = 8
metric=load_metric('bertscore')

In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [8]:
d = dataset.train_test_split(test_size=0.1)

In [9]:
num_labels = 2
def relabel(gender):
    return 0 if gender == "male" else 1

In [10]:
tokenizer("aaa", truncation=True, padding=True)

{'input_ids': [101, 170, 22118, 102], 'token_type_ids': [0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1]}

In [11]:
d = d.map(lambda x: {'labels': relabel(x["gender"])})

HBox(children=(FloatProgress(value=0.0, max=7776.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=864.0), HTML(value='')))




In [12]:
d = d.map(lambda x: tokenizer(x["sentence"], truncation=True, padding=True), batched=True)

HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [13]:
d['train']

Dataset({
    features: ['attention_mask', 'emotion', 'emotion word', 'gender', 'input_ids', 'labels', 'person', 'race', 'sentence', 'template', 'token_type_ids'],
    num_rows: 7776
})

In [22]:
metric_name = "accuracy"

args = TrainingArguments(
    "test-bert",
    evaluation_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    logging_dir="./logs"
)

In [23]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
    }


In [24]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

In [27]:
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=d['train'],
    eval_dataset=d['test'],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [28]:
best_run = trainer.hyperparameter_search(n_trials=1, direction="maximize")

[32m[I 2021-03-28 22:49:04,536][0m A new study created in memory with name: no-name-28db1488-2b65-4c73-8453-b9587a8bbf98[0m
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForS

Epoch,Training Loss,Validation Loss,Accuracy,Runtime,Samples Per Second
1,No log,0.339017,0.991898,1.4588,592.278


[32m[I 2021-03-28 22:49:31,890][0m Trial 0 finished with value: 0.9918981481481481 and parameters: {'learning_rate': 1.4283636029349264e-06, 'num_train_epochs': 1, 'seed': 27, 'per_device_train_batch_size': 32}. Best is trial 0 with value: 0.9918981481481481.[0m


In [29]:
trainer.evaluate()

{'eval_loss': 0.33901703357696533,
 'eval_accuracy': 0.9918981481481481,
 'eval_runtime': 1.4297,
 'eval_samples_per_second': 604.311,
 'epoch': 1.0,
 'eval_mem_cpu_alloc_delta': 68455,
 'eval_mem_gpu_alloc_delta': 0,
 'eval_mem_cpu_peaked_delta': 69143,
 'eval_mem_gpu_peaked_delta': 5080576}

In [53]:
# load sbf

sbf = load_dataset("social_bias_frames")

Using custom data configuration default
Reusing dataset social_bias_frames (/home/pw1329/.cache/huggingface/datasets/social_bias_frames/default/0.0.0/7ccf5e07dabdba6791693ea27289996d4771f586aa88f1ff05c52645f2cfd41d)


In [55]:
sbf = sbf.map(lambda x: tokenizer(x['post']), batched=True)

HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=113.0), HTML(value='')))

Token indices sequence length is longer than the specified maximum sequence length for this model (534 > 512). Running this sequence through the model will result in indexing errors





DatasetDict({
    test: Dataset({
        features: ['HITId', 'WorkerId', 'annotatorAge', 'annotatorGender', 'annotatorMinority', 'annotatorPolitics', 'annotatorRace', 'attention_mask', 'dataSource', 'input_ids', 'intentYN', 'offensiveYN', 'post', 'sexPhrase', 'sexReason', 'sexYN', 'speakerMinorityYN', 'targetCategory', 'targetMinority', 'targetStereotype', 'token_type_ids', 'whoTarget'],
        num_rows: 17501
    })
    validation: Dataset({
        features: ['HITId', 'WorkerId', 'annotatorAge', 'annotatorGender', 'annotatorMinority', 'annotatorPolitics', 'annotatorRace', 'attention_mask', 'dataSource', 'input_ids', 'intentYN', 'offensiveYN', 'post', 'sexPhrase', 'sexReason', 'sexYN', 'speakerMinorityYN', 'targetCategory', 'targetMinority', 'targetStereotype', 'token_type_ids', 'whoTarget'],
        num_rows: 16738
    })
    train: Dataset({
        features: ['HITId', 'WorkerId', 'annotatorAge', 'annotatorGender', 'annotatorMinority', 'annotatorPolitics', 'annotatorRace', 'attent

In [65]:
type(sbf['train'])

datasets.arrow_dataset.Dataset

In [67]:
res = trainer.predict(sbf['validation'])

In [75]:
res[0][:20]

array([[ 0.05472107, -1.0563152 ],
       [ 0.05472107, -1.0563152 ],
       [ 0.05472107, -1.0563152 ],
       [ 0.05472107, -1.0563152 ],
       [ 0.05472107, -1.0563152 ],
       [ 0.05472107, -1.0563152 ],
       [ 0.05472107, -1.0563152 ],
       [ 0.05472107, -1.0563152 ],
       [-0.25995144, -0.28236598],
       [-0.25995144, -0.28236598],
       [-0.25995144, -0.28236598],
       [-0.17341861,  0.19545026],
       [-0.17341861,  0.19545026],
       [-0.17341861,  0.19545026],
       [-0.35052088,  0.36407298],
       [-0.35052088,  0.36407298],
       [-0.35052085,  0.36407265],
       [-0.28386626,  0.0601012 ],
       [-0.28386626,  0.0601012 ],
       [-0.28386626,  0.0601012 ]], dtype=float32)

In [79]:
#uncomment to see posts
#sbf['train']['post'][:20]

In [None]:
# %load_ext tensorboard
# %tensorboard --logdir runs/Mar28_22-25-53_gr004.nyu.cluster