In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from datasets import Dataset

from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
import transformers

import torch

from ailignment.datasets.util import get_accuracy_metric
from ailignment.datasets.moral_stories import make_action_classification_dataframe, get_random_value_dataset
import ailignment.datasets.moral_stories_clustered as msc
from ailignment.training import sequence_classification

pd.set_option('display.max_colwidth', 400)
dataframe = pd.read_pickle("../data/moral_stories_proto_l2s.dat")

In [2]:
import json
def load_json(path):
    with open(path) as f:
        d = f.readlines()
    d = [json.loads(x) for x in d]
    d =  pd.DataFrame.from_records(d)
    d["label"] = d["label"].astype("int32")
    return d

def load_splits(folder="action/"):
    train = load_json(folder+"train.json")
    dev = load_json(folder+"dev.json")
    test = load_json(folder+"test.json")
    return train, dev, test

def load_action(tokenizer):
    def tokenize_action(samples):
        return tokenizer(samples["action"], padding="max_length", 
                         truncation=True, return_token_type_ids=True)
    train, dev, test = load_splits("action/")
    train = Dataset.from_pandas(train).shuffle().map(tokenize_action, batched=True)
    dev = Dataset.from_pandas(dev).map(tokenize_action, batched=True)
    test = Dataset.from_pandas(test).map(tokenize_action, batched=True)
    return train, dev, test

def load_action_norm(tokenizer):
    def tokenize(samples):
        return tokenizer(samples["action"], samples["norm"], padding="max_length", 
                         truncation=True, return_token_type_ids=True)
    train, dev, test = load_splits("action_norm/")
    train = Dataset.from_pandas(train).shuffle().map(tokenize, batched=True)
    dev = Dataset.from_pandas(dev).map(tokenize, batched=True)
    test = Dataset.from_pandas(test).map(tokenize, batched=True)
    return train, dev, test

def load_action_context(tokenizer):
    def tokenize(samples):
        return tokenizer(samples["action"], samples["context"], padding="max_length", 
                         truncation=True, return_token_type_ids=True)
    train, dev, test = load_splits("action_context/")
    train = Dataset.from_pandas(train).shuffle().map(tokenize, batched=True)
    dev = Dataset.from_pandas(dev).map(tokenize, batched=True)
    test = Dataset.from_pandas(test).map(tokenize, batched=True)
    return train, dev, test

In [3]:
name = "roberta-large"
name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(name)
model = AutoModelForSequenceClassification.from_pretrained(name, num_labels=2)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [4]:
train, dev, test = load_action_norm(tokenizer)

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [5]:
train = train.filter(lambda x: x["label"]!=2)
dev = dev.filter(lambda x: x["label"]!=2)
test = test.filter(lambda x: x["label"]!=2)

  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [6]:
training_args = TrainingArguments(
    output_dir="/data/kiehne/results/shuffled_values/random/trash/",
    num_train_epochs=7,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=1,
    warmup_steps=500,
    #weight_decay=0.01,
    logging_dir='logs/',
    log_level="info",
    logging_steps=500,
    evaluation_strategy="epoch",
    save_steps=30000000,
    save_strategy="epoch",
    learning_rate=1e-5
)
acc_metric = get_accuracy_metric()

In [7]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=dev,
    compute_metrics=acc_metric,
)
logs = trainer.train()
trainer.evaluate(test)

The following columns in the training set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: norm, action.
***** Running training *****
  Num examples = 1628
  Num Epochs = 7
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 714


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.767691,0.473118
2,No log,0.711081,0.483871
3,No log,0.671362,0.607527
4,No log,0.615831,0.677419
5,0.632600,0.54016,0.752688
6,0.632600,0.562581,0.774194
7,0.632600,0.598864,0.758065


The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: norm, action.
***** Running Evaluation *****
  Num examples = 186
  Batch size = 16
Saving model checkpoint to /data/kiehne/results/shuffled_values/random/trash/checkpoint-102
Configuration saved in /data/kiehne/results/shuffled_values/random/trash/checkpoint-102/config.json
Model weights saved in /data/kiehne/results/shuffled_values/random/trash/checkpoint-102/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: norm, action.
***** Running Evaluation *****
  Num examples = 186
  Batch size = 16
Saving model checkpoint to /data/kiehne/results/shuffled_values/random/trash/checkpoint-204
Configuration saved in /data/kiehne/results/shuffled_values/random/trash/checkpoint-204/config.json
Model weights saved in /data/kiehne/results/shuf

{'eval_loss': 0.7107114791870117,
 'eval_accuracy': 0.6875,
 'eval_runtime': 1.8635,
 'eval_samples_per_second': 103.032,
 'eval_steps_per_second': 6.44,
 'epoch': 7.0}