In [13]:
!pip install pandas
!pip install numpy
!pip install transformers
!pip install torch torchvision
!pip install sentencepiece



Imports

In [14]:
import json
import torch
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler
from tqdm import tqdm
from transformers import BertTokenizer, RobertaTokenizer, RobertaForSequenceClassification, AdamW, BertForSequenceClassification
from transformers import DistilBertForTokenClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, f1_score
import statistics
import numpy as np

Read Data

In [15]:
def read_data_entity_marker(tags):
    context_text = []
    context_tags = []

    for tagg in tags:
        context_text.append(tagg[1])
    return context_text


def read_data_tag_sentence(texts, tags):
    context_text = []
    context_tags = []

    for textt in texts:
        for tagg in tags:
            if int(textt[0]) == int(tagg[0]):
                context_text.append(textt[1])
                context_tags.append(tagg[1])
            else:
                continue
    return context_text, context_tags


def read_labels(dataa):
    labels = []
    count = 0
    for dat in dataa:
        for ent in dat["entities"]:
            labels.append(int(ent["label"]))
            count += 1
    return labels

Tokenize Data

In [16]:
def tokenizeData_entity_marker(tokenizer, text, max_length=256):
    input_ids = []
    attention_masks = []

    for tx in text:
        tokenizedData = tokenizer.encode_plus(tx, max_length=max_length,
                                              padding='max_length', truncation="longest_first")
        tokenizedQP = tokenizedData["input_ids"]
        attentionMask = tokenizedData["attention_mask"]

        input_ids.append(tokenizedQP)
        attention_masks.append(attentionMask)

    return np.array(input_ids), np.array(attention_masks)

def tokenizeData_tag_sentence(tokenizer, text, tags, max_length=256):
    input_ids = []
    attention_masks = []

    for tx, tg in zip(text, tags):
        tokenizedData = tokenizer.encode_plus(tx, tg, max_length=max_length,
                                              padding='max_length', truncation="longest_first")
        tokenizedQP = tokenizedData["input_ids"]
        attentionMask = tokenizedData["attention_mask"]

        input_ids.append(tokenizedQP)
        attention_masks.append(attentionMask)

    return np.array(input_ids), np.array(attention_masks)

Data Loaders

In [17]:
def buildDataLoaders(batchSize, trainFeatures, testFeatures):
    trainTensors = [torch.tensor(feature, dtype=torch.long) for feature in trainFeatures]
    testTensors = [torch.tensor(feature, dtype=torch.long) for feature in testFeatures]

    trainDataset = TensorDataset(*trainTensors)
    testDataset = TensorDataset(*testTensors)

    trainSampler = RandomSampler(trainDataset)
    testSampler = SequentialSampler(testDataset)

    trainDataloader = DataLoader(trainDataset, sampler=trainSampler, batch_size=batchSize)
    testDataloader = DataLoader(testDataset, sampler=testSampler, batch_size=batchSize)

    return trainDataloader, testDataloader

Training

In [18]:
def train(numEpochs, gradSteps, model, optimizer, trainDataLoader):
    trainLossHistory = []

    for _ in tqdm(range(numEpochs), desc="Training Epoch's"):

        # Train the model for fine-tuning
        epochTrainLoss = 0  # Cumulative loss
        model.train()
        model.zero_grad()

        for step, batch in enumerate(trainDataLoader):
            input_ids = batch[0].to(device)
            attention_masks = batch[1].to(device)
            label = batch[2].to(device)
            outputs = model(input_ids, token_type_ids=None, attention_mask=attention_masks, labels=label)

            loss = outputs[0]
            loss = loss / gradSteps
            epochTrainLoss += loss.item()
            loss.backward()

            if (step + 1) % gradSteps == 0:  # Gradient accumulation is over
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Clipping gradients
                optimizer.step()
                model.zero_grad()

        epochTrainLoss = epochTrainLoss / len(trainDataLoader)
        trainLossHistory.append(epochTrainLoss)

Predict and Evaluate

In [19]:
def predict_tag_sentence(tokenizer, model, text, tag, max_length=256):
    sequence = tokenizer.encode_plus(tag, text, max_length=max_length,
                                     padding='max_length', truncation="longest_first"
                                     , return_tensors="pt")['input_ids'].to(device)

    logits = model(sequence)[0]
    probabilities = torch.softmax(logits, dim=1).detach().cpu().tolist()[0]
    if probabilities[1] > 0.5:
        return 1
    return 0

def predict_entity_marker(tokenizer, model, text, max_length=256):
    sequence = tokenizer.encode_plus(text, max_length=max_length,
                                     padding='max_length', truncation="longest_first"
                                     , return_tensors="pt")['input_ids'].to(device)

    logits = model(sequence)[0]
    probabilities = torch.softmax(logits, dim=1).detach().cpu().tolist()[0]
    if probabilities[1] > 0.5:
        return 1
    return 0


def evaluate(pred_labels, test_labels):
    pred_labels1 = np.array_split(pred_labels, 5)
    test_labels1 = np.array_split(test_labels, 5)
    accuracy = []
    f1 = []
    for test, pred in zip(test_labels1, pred_labels1):
        accuracy.append(accuracy_score(test, pred))
        f1.append(f1_score(test, pred, average="weighted"))

    print("Accuracy: " + str(sum(accuracy) / len(accuracy)))
    print("Standard Deviation: " + str(statistics.stdev(accuracy)))

    print("F1 Score: " + str(sum(f1) / len(f1)))
    print("Standard Deviation: " + str(statistics.stdev(f1)))

BERT Entity Marker

In [20]:
def bert_entity_marker_a(file):
    with open(file) as f:
        data = json.load(f)

    text_list = []
    tag_list = []

    for d in data:
        for ent in d["entities"]:
            tag_list.append(
                [d["id"],
                 d["text"].replace(ent["arg1"] + " ", "[e]" + ent["arg1"] + "[\e]").replace(" " + ent["arg2"] + " ",
                                                                                            "[e]" + ent[
                                                                                                "arg2"] + "[\e]").strip(
                     "\n")])

    text = read_data_entity_marker(tag_list)
    labels = read_labels(data)

    sentences = []
    args1 = []
    args2 = []

    for d in data:
      sentences.append([d["id"], d["text"]])
      for ent in d["entities"]:
        args1.append([d["id"], ent["arg1"]])
        args2.append([d["id"], ent["arg2"]])

    train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=.2)

    train_arg1, test_arg1, train_arg2, test_arg2 = train_test_split(args1, args2, test_size=.2)

    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
    train_ids, train_attn = tokenizeData_entity_marker(tokenizer, train_texts)
    test_ids, test_attn = tokenizeData_entity_marker(tokenizer, test_texts)

    trainFeatures = (train_ids, train_attn, train_labels)
    testFeatures = (test_ids, test_attn)

    trainDataLoader, testDataLoader = buildDataLoaders(8, trainFeatures, testFeatures)

    model = BertForSequenceClassification.from_pretrained('bert-base-cased').to(device)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    train(3, 3, model, optimizer, trainDataLoader)

    pred_labels = []
    for text in test_texts:
        pred_labels.append(predict_entity_marker(tokenizer, model, text))

    evaluate(pred_labels, test_labels)

    pred_args = []
    for arg1, arg2, label in zip(test_arg1, test_arg2, pred_labels):
      pred_args.append({"arg1": arg1[1], "arg2": arg2[1], "label": label})
    
    with open('predictions.json', 'w') as f:
      json.dump(pred_args, f, indent=4)


def bert_entity_marker_b(file):
    with open(file) as f:
        data = json.load(f)

    text_list = []
    tag_list = []

    for d in data:
        for ent in d["entities"]:
            tag_list.append(
                [d["id"],
                 d["text"].replace(ent["arg1"] + " ", "[e] " + ent["arg1"] + " [\e]").replace(" " + ent["arg2"] + " ",
                                                                                            "[e] " + ent[
                                                                                                "arg2"] + " [\e]").strip(
                     "\n")])

    text = read_data_entity_marker(tag_list)
    labels = read_labels(data)

    sentences = []
    args1 = []
    args2 = []

    for d in data:
      sentences.append([d["id"], d["text"]])
      for ent in d["entities"]:
        args1.append([d["id"], ent["arg1"]])
        args2.append([d["id"], ent["arg2"]])

    train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=.2)

    train_arg1, test_arg1, train_arg2, test_arg2 = train_test_split(args1, args2, test_size=.2)

    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
    train_ids, train_attn = tokenizeData_entity_marker(tokenizer, train_texts)
    test_ids, test_attn = tokenizeData_entity_marker(tokenizer, test_texts)

    trainFeatures = (train_ids, train_attn, train_labels)
    testFeatures = (test_ids, test_attn)

    trainDataLoader, testDataLoader = buildDataLoaders(8, trainFeatures, testFeatures)

    model = BertForSequenceClassification.from_pretrained('bert-base-cased').to(device)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    train(3, 3, model, optimizer, trainDataLoader)

    pred_labels = []
    for text in test_texts:
        pred_labels.append(predict_entity_marker(tokenizer, model, text))

    evaluate(pred_labels, test_labels)

    pred_args = []
    for arg1, arg2, label in zip(test_arg1, test_arg2, pred_labels):
      pred_args.append({"arg1": arg1[1], "arg2": arg2[1], "label": label})
    
    with open('predictions.json', 'w') as f:
      json.dump(pred_args, f, indent=4)


def bert_entity_marker_c(file):
    with open(file) as f:
        data = json.load(f)

    text_list = []
    tag_list = []

    for d in data:
        for ent in d["entities"]:
            tag_list.append(
                [d["id"],
                 d["text"].replace(ent["arg1"] + " ", "ENTITY1" + ent["arg1"]).replace(" " + ent["arg2"] + " ",
                                                                                            "ENTITY2" + ent[
                                                                                                "arg2"]).strip(
                     "\n")])

    text = read_data_entity_marker(tag_list)
    labels = read_labels(data)

    sentences = []
    args1 = []
    args2 = []

    for d in data:
      sentences.append([d["id"], d["text"]])
      for ent in d["entities"]:
        args1.append([d["id"], ent["arg1"]])
        args2.append([d["id"], ent["arg2"]])

    train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=.2)

    train_arg1, test_arg1, train_arg2, test_arg2 = train_test_split(args1, args2, test_size=.2)

    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
    train_ids, train_attn = tokenizeData_entity_marker(tokenizer, train_texts)
    test_ids, test_attn = tokenizeData_entity_marker(tokenizer, test_texts)

    trainFeatures = (train_ids, train_attn, train_labels)
    testFeatures = (test_ids, test_attn)

    trainDataLoader, testDataLoader = buildDataLoaders(8, trainFeatures, testFeatures)

    model = BertForSequenceClassification.from_pretrained('bert-base-cased').to(device)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    train(3, 3, model, optimizer, trainDataLoader)

    pred_labels = []
    for text in test_texts:
        pred_labels.append(predict_entity_marker(tokenizer, model, text))

    evaluate(pred_labels, test_labels)

    pred_args = []
    for arg1, arg2, label in zip(test_arg1, test_arg2, pred_labels):
      pred_args.append({"arg1": arg1[1], "arg2": arg2[1], "label": label})
    
    with open('predictions.json', 'w') as f:
      json.dump(pred_args, f, indent=4)

BERT Tag Sentence

In [21]:
def bert_tag_sentence(file):
    with open(file) as f:
        data = json.load(f)

    text_list = []
    tag_list = []

    for d in data:
        text_list.append([d["id"], d["text"].strip("\n")])
        for ent in d["entities"]:
            tag_list.append([d["id"], d["text"].replace(ent["arg1"] + " ", "ENTITY1 ").replace(" " + ent["arg2"] + " ",
                                                                                               " ENTITY2 ").strip(
                "\n")])

    text, tags = read_data_tag_sentence(text_list, tag_list)
    labels = read_labels(data)

    sentences = []
    args1 = []
    args2 = []

    for d in data:
      sentences.append([d["id"], d["text"]])
      for ent in d["entities"]:
        args1.append([d["id"], ent["arg1"]])
        args2.append([d["id"], ent["arg2"]])

    train_texts, test_texts, train_tags, test_tags, train_labels, test_labels = train_test_split(text, tags,
                                                                                                 labels, test_size=.2)
    
    train_arg1, test_arg1, train_arg2, test_arg2 = train_test_split(args1, args2, test_size=.2)

    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
    train_ids, train_attn = tokenizeData_tag_sentence(tokenizer, train_texts, train_tags)
    test_ids, test_attn = tokenizeData_tag_sentence(tokenizer, test_texts, test_tags)

    trainFeatures = (train_ids, train_attn, train_labels)
    testFeatures = (test_ids, test_attn)

    trainDataLoader, testDataLoader = buildDataLoaders(8, trainFeatures, testFeatures)

    model = BertForSequenceClassification.from_pretrained('bert-base-cased').to(device)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    train(3, 3, model, optimizer, trainDataLoader)

    pred_labels = []
    for text, tags in zip(test_texts, test_tags):
        pred_labels.append(predict_tag_sentence(tokenizer, model, text, tags))

    evaluate(pred_labels, test_labels)

    pred_args = []
    for arg1, arg2, label in zip(test_arg1, test_arg2, pred_labels):
      pred_args.append({"arg1": arg1[1], "arg2": arg2[1], "label": label})
    
    with open('predictions.json', 'w') as f:
      json.dump(pred_args, f, indent=4)


Roberta Entity Marker

In [22]:
def roberta_entity_marker_a(file):
    with open(file) as f:
        data = json.load(f)

    text_list = []
    tag_list = []

    for d in data:
        for ent in d["entities"]:
            tag_list.append(
                [d["id"],
                 d["text"].replace(ent["arg1"] + " ", "[e]" + ent["arg1"] + "[\e]").replace(" " + ent["arg2"] + " ",
                                                                                            "[e]" + ent[
                                                                                                "arg2"] + "[\e]").strip(
                     "\n")])

    text = read_data_entity_marker(tag_list)
    labels = read_labels(data)

    sentences = []
    args1 = []
    args2 = []

    for d in data:
      sentences.append([d["id"], d["text"]])
      for ent in d["entities"]:
        args1.append([d["id"], ent["arg1"]])
        args2.append([d["id"], ent["arg2"]])

    train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=.2)

    train_arg1, test_arg1, train_arg2, test_arg2 = train_test_split(args1, args2, test_size=.2)

    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    train_ids, train_attn = tokenizeData_entity_marker(tokenizer, train_texts)
    test_ids, test_attn = tokenizeData_entity_marker(tokenizer, test_texts)

    trainFeatures = (train_ids, train_attn, train_labels)
    testFeatures = (test_ids, test_attn)

    trainDataLoader, testDataLoader = buildDataLoaders(8, trainFeatures, testFeatures)

    model = RobertaForSequenceClassification.from_pretrained('roberta-base').to(device)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    train(3, 3, model, optimizer, trainDataLoader)

    pred_labels = []
    for text in test_texts:
        pred_labels.append(predict_entity_marker(tokenizer, model, text))

    evaluate(pred_labels, test_labels)

    pred_args = []
    for arg1, arg2, label in zip(test_arg1, test_arg2, pred_labels):
      pred_args.append({"arg1": arg1[1], "arg2": arg2[1], "label": label})
    
    with open('predictions.json', 'w') as f:
      json.dump(pred_args, f, indent=4)


def roberta_entity_marker_b(file):
    with open(file) as f:
        data = json.load(f)

    text_list = []
    tag_list = []

    for d in data:
        for ent in d["entities"]:
            tag_list.append(
                [d["id"],
                 d["text"].replace(ent["arg1"] + " ", "[e] " + ent["arg1"] + " [\e]").replace(" " + ent["arg2"] + " ",
                                                                                            "[e] " + ent[
                                                                                                "arg2"] + " [\e]").strip(
                     "\n")])

    text = read_data_entity_marker(tag_list)
    labels = read_labels(data)

    sentences = []
    args1 = []
    args2 = []

    for d in data:
      sentences.append([d["id"], d["text"]])
      for ent in d["entities"]:
        args1.append([d["id"], ent["arg1"]])
        args2.append([d["id"], ent["arg2"]])

    train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=.2)

    train_arg1, test_arg1, train_arg2, test_arg2 = train_test_split(args1, args2, test_size=.2)

    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    train_ids, train_attn = tokenizeData_entity_marker(tokenizer, train_texts)
    test_ids, test_attn = tokenizeData_entity_marker(tokenizer, test_texts)

    trainFeatures = (train_ids, train_attn, train_labels)
    testFeatures = (test_ids, test_attn)

    trainDataLoader, testDataLoader = Read.buildDataLoaders(8, trainFeatures, testFeatures)

    model = RobertaForSequenceClassification.from_pretrained('roberta-base').to(device)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    train(3, 3, model, optimizer, trainDataLoader)

    pred_labels = []
    for text in test_texts:
        pred_labels.append(predict_entity_marker(tokenizer, model, text))

    evaluate(pred_labels, test_labels)

    pred_args = []
    for arg1, arg2, label in zip(test_arg1, test_arg2, pred_labels):
      pred_args.append({"arg1": arg1[1], "arg2": arg2[1], "label": label})
    
    with open('predictions.json', 'w') as f:
      json.dump(pred_args, f, indent=4)


def roberta_entity_marker_c(file):
    with open(file) as f:
        data = json.load(f)

    text_list = []
    tag_list = []

    for d in data:
        for ent in d["entities"]:
            tag_list.append(
                [d["id"],
                 d["text"].replace(ent["arg1"] + " ", "ENTITY1" + ent["arg1"]).replace(" " + ent["arg2"] + " ",
                                                                                            "ENTITY2" + ent[
                                                                                                "arg2"]).strip(
                     "\n")])

    text = read_data_entity_marker(tag_list)
    labels = read_labels(data)

    sentences = []
    args1 = []
    args2 = []

    for d in data:
      sentences.append([d["id"], d["text"]])
      for ent in d["entities"]:
        args1.append([d["id"], ent["arg1"]])
        args2.append([d["id"], ent["arg2"]])

    train_texts, test_texts, train_labels, test_labels = train_test_split(text, labels, test_size=.2)

    train_arg1, test_arg1, train_arg2, test_arg2 = train_test_split(args1, args2, test_size=.2)

    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    train_ids, train_attn = tokenizeData_entity_marker(tokenizer, train_texts)
    test_ids, test_attn = tokenizeData_entity_marker(tokenizer, test_texts)

    trainFeatures = (train_ids, train_attn, train_labels)
    testFeatures = (test_ids, test_attn)

    trainDataLoader, testDataLoader = buildDataLoaders(8, trainFeatures, testFeatures)

    model = RobertaForSequenceClassification.from_pretrained('roberta-base').to(device)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    train(3, 3, model, optimizer, trainDataLoader)

    pred_labels = []
    for text in test_texts:
        pred_labels.append(predict_entity_marker(tokenizer, model, text))

    evaluate(pred_labels, test_labels)

    pred_args = []
    for arg1, arg2, label in zip(test_arg1, test_arg2, pred_labels):
      pred_args.append({"arg1": arg1[1], "arg2": arg2[1], "label": label})
    
    with open('predictions.json', 'w') as f:
      json.dump(pred_args, f, indent=4)

Roberta Tag Sentence

In [23]:
def roberta_tag_sentence(file):
    with open(file) as f:
        data = json.load(f)

    text_list = []
    tag_list = []

    for d in data:
        text_list.append([d["id"], d["text"].strip("\n")])
        for ent in d["entities"]:
            tag_list.append([d["id"], d["text"].replace(ent["arg1"] + " ", "ENTITY1 ").replace(" " + ent["arg2"] + " ",
                                                                                               " ENTITY2 ").strip(
                "\n")])

    text, tags = read_data_tag_sentence(text_list, tag_list)
    labels = read_labels(data)

    sentences = []
    args1 = []
    args2 = []

    for d in data:
      sentences.append([d["id"], d["text"]])
      for ent in d["entities"]:
        args1.append([d["id"], ent["arg1"]])
        args2.append([d["id"], ent["arg2"]])

    train_texts, test_texts, train_tags, test_tags, train_labels, test_labels = train_test_split(text, tags,
                                                                                                 labels, test_size=.2)
    
    train_arg1, test_arg1, train_arg2, test_arg2 = train_test_split(args1, args2, test_size=.2)

    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    train_ids, train_attn = tokenizeData_tag_sentence(tokenizer, train_texts, train_tags)
    test_ids, test_attn = tokenizeData_tag_sentence(tokenizer, test_texts, test_tags)

    trainFeatures = (train_ids, train_attn, train_labels)
    testFeatures = (test_ids, test_attn)

    trainDataLoader, testDataLoader = buildDataLoaders(8, trainFeatures, testFeatures)

    model = RobertaForSequenceClassification.from_pretrained('roberta-base').to(device)
    optimizer = AdamW(model.parameters(), lr=5e-5)

    train(3, 3, model, optimizer, trainDataLoader)

    pred_labels = []
    for text, tags in zip(test_texts, test_tags):
        pred_labels.append(predict_tag_sentence(tokenizer, model, text, tags))

    evaluate(pred_labels, test_labels)

    pred_args = []
    for arg1, arg2, label in zip(test_arg1, test_arg2, pred_labels):
      pred_args.append({"arg1": arg1[1], "arg2": arg2[1], "label": label})
    
    with open('predictions.json', 'w') as f:
      json.dump(pred_args, f, indent=4)

Main

In [24]:
if __name__ == '__main__':
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(device)

    file = input("Enter Dataset: ")

    print("Select a model to train:")
    print("1.) BERT")
    print("2.) RoBERTa")

    model = int(input())

    if model == 1:
        print("Select a method:")
        print("1.) Entity Marker A")
        print("2.) Entity Marker B")
        print("3.) Entity Marker C")
        print("4.) Tag Sentence")

        method = int(input())

        if method == 1:
            bert_entity_marker_a(file)
        elif method == 2:
            bert_entity_marker_b(file)
        elif method == 3:
            bert_entity_marker_c(file)
        else:
            bert_tag_sentence(file)
    

    elif model == 2:
        print("Select a method:")
        print("1.) Entity Marker A")
        print("2.) Entity Marker B")
        print("3.) Entity Marker C")
        print("4.) Tag Sentence")

        method = int(input())

        if method == 1:
            roberta_entity_marker_a(file)
        elif method == 2:
            roberta_entity_marker_b(file)
        elif method == 3:
            roberta_entity_marker_c(file)
        else:
            roberta_tag_sentence(file)

    else:
        print("Please select 1 or 2.")

cuda
Enter Dataset: NYT.json
Select a model to train:
1.) BERT
2.) RoBERTa
2
Select a method:
1.) Entity Marker A
2.) Entity Marker B
3.) Entity Marker C
4.) Tag Sentence
4


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=481.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=501200538.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

Accuracy: 0.5630555555555555
Standard Deviation: 0.044273935614427
F1 Score: 0.5538263782545404
Standard Deviation: 0.04353255955049212
