In [79]:
import json
from json import loads,dumps
import ast
import re

import pickle
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

import os
import time
import math
from sklearn.metrics import roc_auc_score, confusion_matrix, matthews_corrcoef, roc_curve, auc, average_precision_score
from scipy.special import softmax
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from torch.nn import CrossEntropyLoss

from transformers import BeitForImageClassification, BeitFeatureExtractor, Trainer, TrainingArguments
from transformers import FlavaConfig, FlavaModel, FlavaForPreTraining
from transformers import VisionEncoderDecoderModel, AutoTokenizer
from transformers import CLIPProcessor, CLIPModel, CLIPFeatureExtractor
from transformers import DebertaForSequenceClassification, DebertaConfig
from transformers import TrOCRProcessor, DebertaV2ForSequenceClassification, DebertaV2Config
from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor
from transformers import AutoProcessor, GitVisionModel, AutoModelForCausalLM
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
from transformers import BertPreTrainedModel, RobertaConfig, RobertaTokenizerFast

from transformers.optimization import AdamW, get_linear_schedule_with_warmup

from transformers.models.roberta.modeling_roberta import RobertaClassificationHead, RobertaConfig, RobertaModel

In [2]:
print(torch.version.cuda)

11.8


In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

In [None]:
caption_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
caption_tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
caption_feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
caption_model.to(device)

In [None]:
max_length = 32
num_beams = 4
gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
def predict_step(image_paths):
    images = []
    for image_path in image_paths:
        i_image = Image.open(image_path)
        if i_image.mode != "RGB":
            i_image = i_image.convert(mode="RGB")

        images.append(i_image)

    pixel_values = caption_feature_extractor(images=images, return_tensors="pt").pixel_values
    pixel_values = pixel_values.to(device)

    output_ids = caption_model.generate(pixel_values, **gen_kwargs)

    preds = caption_tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    preds = [pred.strip() for pred in preds]
    return preds

In [None]:
predict_step(['hm_data/img/01236.png'])

In [None]:
processor = AutoProcessor.from_pretrained("microsoft/git-base-coco")

model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")

image = Image.open('hm_data/img/01236.png')

pixel_values = processor(images=image, return_tensors="pt").pixel_values

generated_ids = model.generate(pixel_values=pixel_values, max_length=50)

generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

print(generated_caption)

In [None]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer_sent = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
# PT
model_sent = AutoModelForSequenceClassification.from_pretrained(MODEL)
    
sentiment_task = pipeline("sentiment-analysis", model=model_sent, tokenizer=tokenizer_sent)


In [None]:
print(sentiment_task("a man with his head turned to the side of the road."))

In [None]:
img_dir = "hm_data/img"

with open("train.jsonl",encoding='utf8') as f:
    data = [json.loads(line) for line in f]

inc = 1

for img in data:
    img['text-sentiment'] = sentiment_task(img['text'])[0]['label']
    image = Image.open('hm_data/'+img['img'])
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
    img['caption'] = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    img['caption-sentiment'] = sentiment_task(img['caption'])[0]['label']
    inc += 1
    if inc%250 == 0:
        print(inc)
#     print(img)
with open('train_sentiment.jsonl','w',encoding='utf8') as f:
    for item in data:
        json.dump(item,f)
        print(file=f)

In [None]:
with open("train_dev_all.entity.jsonl",encoding='utf8') as m:
    datah = [json.loads(line) for line in m]

dfdata = pd.DataFrame(data)
dfdatah = pd.DataFrame(datah)

In [None]:
dfdata.head()

In [None]:
dfdata.shape

In [None]:
dfdatah.head()

In [None]:
dfmerge = dfdata.merge(dfdatah[['img', 'partition_description']], how = 'left', on='img')

In [None]:
dfmerge.shape

In [None]:
dfmerge.head()

In [None]:
# https://stackoverflow.com/questions/17864466/flatten-a-list-of-strings-and-lists-of-strings-and-lists-in-python
def flatten_to_strings(listOfLists):
    """Flatten a list of (lists of (lists of strings)) for any level 
    of nesting"""
    result = []

    for i in listOfLists:
        # Only append if i is a basestring (superclass of string)
        if isinstance(i, str):
            result.append(i)
        # Otherwise call this function recursively
        else:
            result.extend(flatten_to_strings(i))
    return result


In [None]:
dfmerge['partition_description'] = dfmerge['partition_description'].apply(flatten_to_strings)

In [None]:
dfmerge.head()

In [None]:
dfmerge['partition_description'] = ",".join(dfmerge['partition_description'][0])
dfmerge.head()

In [None]:
dfmerge['texty'] = dfmerge['text'] + "[SEP]" + dfmerge['text-sentiment'] + "[SEP]" + dfmerge['caption'] + "[SEP]" + dfmerge['caption-sentiment'] + "[SEP]" + dfmerge['partition_description']

In [None]:
dfmerge.head()

In [None]:
outs = dfmerge[['id','img','label','texty']].to_dict('records')

with open('train_text.jsonl', "w",encoding='utf8') as f:
    for i in range(len(outs)):
        f.write(str(outs[i])+"\n")

# print(outs)

In [None]:
img_dir = "hm_data/img"

with open("dev_seen.jsonl",encoding='utf8') as f:
    data = [json.loads(line) for line in f]

inc = 1

for img in data:
    img['text-sentiment'] = sentiment_task(img['text'])[0]['label']
    image = Image.open('hm_data/'+img['img'])
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
    img['caption'] = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    img['caption-sentiment'] = sentiment_task(img['caption'])[0]['label']
    inc += 1
    if inc%250 == 0:
        print(inc)
#     print(img)
with open('dev_seen_sentiment.jsonl','w',encoding='utf8') as f:
    for item in data:
        json.dump(item,f)
        print(file=f)

In [None]:
dfdata = pd.DataFrame(data)
dfmerge = dfdata.merge(dfdatah[['img', 'partition_description']], how = 'left', on='img')
dfmerge['partition_description'] = dfmerge['partition_description'].apply(flatten_to_strings)
dfmerge['partition_description'] = ",".join(dfmerge['partition_description'][0])
dfmerge['texty'] = dfmerge['text'] + "[SEP]" + dfmerge['text-sentiment'] + "[SEP]" + dfmerge['caption'] + "[SEP]" + dfmerge['caption-sentiment'] + "[SEP]" + dfmerge['partition_description']
outs = dfmerge[['id','img','label','texty']].to_dict('records')

with open('dev_seen_text.jsonl', "w",encoding='utf8') as f:
    for i in range(len(outs)):
        f.write(str(outs[i])+"\n")

In [None]:
img_dir = "hm_data/img"

with open("dev_unseen.jsonl",encoding='utf8') as f:
    data = [json.loads(line) for line in f]

inc = 1

for img in data:
    img['text-sentiment'] = sentiment_task(img['text'])[0]['label']
    image = Image.open('hm_data/'+img['img'])
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
    img['caption'] = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    img['caption-sentiment'] = sentiment_task(img['caption'])[0]['label']
    inc += 1
    if inc%250 == 0:
        print(inc)
#     print(img)
with open('dev_unseen_sentiment.jsonl','w',encoding='utf8') as f:
    for item in data:
        json.dump(item,f)
        print(file=f)

In [None]:
dfdata = pd.DataFrame(data)
dfmerge = dfdata.merge(dfdatah[['img', 'partition_description']], how = 'left', on='img')
dfmerge['partition_description'] = dfmerge['partition_description'].apply(flatten_to_strings)
dfmerge['partition_description'] = ",".join(dfmerge['partition_description'][0])
dfmerge['texty'] = dfmerge['text'] + "[SEP]" + dfmerge['text-sentiment'] + "[SEP]" + dfmerge['caption'] + "[SEP]" + dfmerge['caption-sentiment'] + "[SEP]" + dfmerge['partition_description']
outs = dfmerge[['id','img','label','texty']].to_dict('records')

with open('dev_unseen_text.jsonl', "w",encoding='utf8') as f:
    for i in range(len(outs)):
        f.write(str(outs[i])+"\n")

In [3]:
# Parts of fine tuning referenced: https://github.com/pchanda/pchanda.github.io/blob/master/_posts/2021-04-15-Roberta-FineTuning-for-Classification.md

In [162]:
with open("train_text.jsonl", "r", encoding='utf8') as f:
    train_df = pd.DataFrame([ast.literal_eval(line) for line in f.readlines()])
train_df.head()

Unnamed: 0,id,img,label,texty
0,42953,img/42953.png,0,its their character not their color that matte...
1,23058,img/23058.png,0,don't be afraid to love again everyone is not ...
2,13894,img/13894.png,0,putting bows on your pet[SEP]neutral[SEP]a gra...
3,37408,img/37408.png,0,i love everything and everybody! except for sq...
4,82403,img/82403.png,0,"everybody loves chocolate chip cookies, even h..."


In [163]:
def county(st):
    return st.count(' ')

In [164]:
max(train_df['texty'].apply(county))

88

In [165]:
train_df.shape

(8500, 4)

In [166]:
def resplit(s):
    ns = s.split('[SEP]')
    while len(ns)<5:
        ns.append('*')
#     dns = pd.DataFrame(ns).T.reindex(range(5), axis=1).fillna('')
    dns = pd.Series(ns)
#     print(ns)
    return dns

In [168]:
train_df[['text','tsent','caption','csent','extra']] = train_df['texty'].apply(resplit)#.reindex(range(5), axis=1).fillna('')#.str.split('[SEP]',expand=True).reindex(range(5), axis=1).fillna('')
train_df.head()

Unnamed: 0,id,img,label,texty,text,tsent,caption,csent,extra
0,42953,img/42953.png,0,its their character not their color that matte...,its their character not their color that matters,neutral,a man with a bald head and a black suit.,negative,"Facial hair,head,models male bald,man"
1,23058,img/23058.png,0,don't be afraid to love again everyone is not ...,don't be afraid to love again everyone is not ...,neutral,a bride and groom are not going to love.,negative,"Facial hair,head,models male bald,man"
2,13894,img/13894.png,0,putting bows on your pet[SEP]neutral[SEP]a gra...,putting bows on your pet,neutral,a gray cat with a red bow,neutral,"Facial hair,head,models male bald,man"
3,37408,img/37408.png,0,i love everything and everybody! except for sq...,i love everything and everybody! except for sq...,positive,person is a black and brown dog with brown spots.,neutral,"Facial hair,head,models male bald,man"
4,82403,img/82403.png,0,"everybody loves chocolate chip cookies, even h...","everybody loves chocolate chip cookies, even h...",positive,a man in a suit and tie is dancing with cookies.,neutral,"Facial hair,head,models male bald,man"


In [169]:
#for no-caption experiment
train_df['texty'] = train_df[['text','tsent','extra']].apply(lambda x: ' [SEP] '.join(x.values.tolist()), axis=1)
train_df.head()

Unnamed: 0,id,img,label,texty,text,tsent,caption,csent,extra
0,42953,img/42953.png,0,its their character not their color that matte...,its their character not their color that matters,neutral,a man with a bald head and a black suit.,negative,"Facial hair,head,models male bald,man"
1,23058,img/23058.png,0,don't be afraid to love again everyone is not ...,don't be afraid to love again everyone is not ...,neutral,a bride and groom are not going to love.,negative,"Facial hair,head,models male bald,man"
2,13894,img/13894.png,0,putting bows on your pet [SEP] neutral [SEP] F...,putting bows on your pet,neutral,a gray cat with a red bow,neutral,"Facial hair,head,models male bald,man"
3,37408,img/37408.png,0,i love everything and everybody! except for sq...,i love everything and everybody! except for sq...,positive,person is a black and brown dog with brown spots.,neutral,"Facial hair,head,models male bald,man"
4,82403,img/82403.png,0,"everybody loves chocolate chip cookies, even h...","everybody loves chocolate chip cookies, even h...",positive,a man in a suit and tie is dancing with cookies.,neutral,"Facial hair,head,models male bald,man"


In [170]:
# use test_seenunseen_text.jsonl for final test
with open("dev_seenunseen_text.jsonl", "r", encoding='utf8') as f:
    test_df = pd.DataFrame([ast.literal_eval(line) for line in f.readlines()])
test_df.head()

Unnamed: 0,id,img,label,texty
0,8291,img/08291.png,1,white people is this a shooting range[SEP]nega...
1,46971,img/46971.png,1,bravery at its finest[SEP]negative[SEP]a man w...
2,3745,img/03745.png,1,your order comes to $37.50 and your white priv...
3,83745,img/83745.png,1,it is time.. to send these parasites back to t...
4,80243,img/80243.png,1,mississippi wind chime[SEP]neutral[SEP]a poste...


In [172]:
#for no caption experiment
test_df[['text','tsent','caption','csent','extra']] = test_df['texty'].apply(resplit)
test_df.head()
test_df['texty'] = test_df[['text','tsent','extra']].apply(lambda x: ' [SEP] '.join(x.values.tolist()), axis=1)
test_df.head()

Unnamed: 0,id,img,label,texty,text,tsent,caption,csent,extra
0,8291,img/08291.png,1,white people is this a shooting range [SEP] ne...,white people is this a shooting range,negative,people that are people in a car,neutral,"Peanut allergy,Family,Parent,Health,Gender ide..."
1,46971,img/46971.png,1,bravery at its finest [SEP] negative [SEP] Pea...,bravery at its finest,negative,a man with a beard and glasses eating a spoon ...,neutral,"Peanut allergy,Family,Parent,Health,Gender ide..."
2,3745,img/03745.png,1,your order comes to $37.50 and your white priv...,your order comes to $37.50 and your white priv...,neutral,"person, a professional chef, is selling her ne...",neutral,"Peanut allergy,Family,Parent,Health,Gender ide..."
3,83745,img/83745.png,1,it is time.. to send these parasites back to t...,it is time.. to send these parasites back to t...,negative,a knight in a full armor,neutral,"Peanut allergy,Family,Parent,Health,Gender ide..."
4,80243,img/80243.png,1,mississippi wind chime [SEP] neutral [SEP] Pea...,mississippi wind chime,neutral,a poster for the documentary,neutral,"Peanut allergy,Family,Parent,Health,Gender ide..."


In [173]:
model_name = "microsoft/deberta-large"
num_labels = 2
device = torch.device("cuda")

tokenizer_name = model_name

max_seq_length = 100 
train_batch_size = 24
test_batch_size = 24
warmup_ratio = 0.06
weight_decay= 6.3e-05#1e-04 # from .0
gradient_accumulation_steps = 1
num_train_epochs = 20
learning_rate = 4.8e-07 
adam_epsilon = 1e-05#1e-04 # from 1e-08

num_hidden_layers = 12
num_attention_heads = 12

In [174]:
config_class = DebertaConfig
model_class = DebertaForSequenceClassification

tokenizer_class = AutoTokenizer

config = config_class.from_pretrained(model_name, num_labels=num_labels)

model = model_class.from_pretrained(model_name, config=config)
print('Model=\n',model,'\n')

tokenizer = tokenizer_class.from_pretrained(tokenizer_name, do_lower_case=False)
print('Tokenizer=',tokenizer,'\n')


Some weights of the model checkpoint at microsoft/deberta-large were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-large and are newly initialized: ['classifier.bias', 'pooler.d

Model=
 DebertaForSequenceClassification(
  (deberta): DebertaModel(
    (embeddings): DebertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=0)
      (LayerNorm): DebertaLayerNorm()
      (dropout): StableDropout()
    )
    (encoder): DebertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x DebertaLayer(
          (attention): DebertaAttention(
            (self): DisentangledSelfAttention(
              (in_proj): Linear(in_features=1024, out_features=3072, bias=False)
              (pos_dropout): StableDropout()
              (pos_proj): Linear(in_features=1024, out_features=1024, bias=False)
              (pos_q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): StableDropout()
            )
            (output): DebertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): DebertaLayerNorm()
              (dropout): StableDropout()
            )
       

In [175]:
class MyClassificationDataset(Dataset):
    
    def __init__(self, data, tokenizer):
        text, labels = data
        self.examples = tokenizer(text=text,text_pair=None,truncation=True,padding="max_length",
                                  max_length=max_seq_length,return_tensors="pt")
        self.labels = torch.tensor(labels, dtype=torch.long)
        

    def __len__(self):
        return len(self.examples["input_ids"])

    def __getitem__(self, index):
        return {key: self.examples[key][index] for key in self.examples}, self.labels[index]


train_examples = (train_df.iloc[:, 3].astype(str).replace('[SEP]',' [SEP] ').tolist(), train_df.iloc[:, 2].tolist())
train_dataset = MyClassificationDataset(train_examples,tokenizer)

test_examples = (test_df.iloc[:, 3].astype(str).replace('[SEP]',' [SEP] ').tolist(), test_df.iloc[:, 2].tolist())
test_dataset = MyClassificationDataset(test_examples,tokenizer)


In [176]:
def get_inputs_dict(batch):
    inputs = {key: value.squeeze(1).to(device) for key, value in batch[0].items()}
    inputs["labels"] = batch[1].to(device)
    return inputs

train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset,sampler=train_sampler,batch_size=train_batch_size)

test_sampler = SequentialSampler(test_dataset)
test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=test_batch_size)

#Extract a batch as sanity-check
batch = get_inputs_dict(next(iter(train_dataloader)))
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)

print(batch)

{'input_ids': tensor([[    1,  4656,  7506,  ...,     0,     0,     0],
        [    1,  1794,  2828,  ...,     0,     0,     0],
        [    1,   627,  1457,  ...,     0,     0,     0],
        ...,
        [    1,  2977,    28,  ...,     0,     0,     0],
        [    1, 14746,    47,  ...,     0,     0,     0],
        [    1,  9178,    47,  ...,     0,     0,     0]], device='cuda:0'), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]], device='cuda:0'), 'labels': tensor([1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0],
       

In [177]:
t_total = len(train_dataloader) // gradient_accumulation_steps * num_train_epochs
optimizer_grouped_parameters = []
custom_parameter_names = set()
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters.extend(
    [
        {
            "params": [
                p
                for n, p in model.named_parameters()
                if n not in custom_parameter_names and not any(nd in n for nd in no_decay)
            ],
            "weight_decay": weight_decay,
        },
        {
            "params": [
                p
                for n, p in model.named_parameters()
                if n not in custom_parameter_names and any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]
)

warmup_steps = math.ceil(t_total * warmup_ratio)
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate, eps=adam_epsilon)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=t_total)



In [178]:
def compute_metrics(preds, model_outputs, labels, eval_examples=None, multi_label=False):
    assert len(preds) == len(labels)
    mismatched = labels != preds
    wrong = [i for (i, v) in zip(eval_examples, mismatched) if v.any()]
    mcc = matthews_corrcoef(labels, preds)
    tn, fp, fn, tp = confusion_matrix(labels, preds, labels=[0, 1]).ravel()
    scores = np.array([softmax(element)[1] for element in model_outputs])
    fpr, tpr, thresholds = roc_curve(labels, scores)
    auroc = auc(fpr, tpr)
    auprc = average_precision_score(labels, scores)
    return (
        {
            **{"mcc": mcc, "tp": tp, "tn": tn, "fp": fp, "fn": fn, "auroc": auroc, "auprc": auprc},
        },
        wrong,
    )

def print_confusion_matrix(result):
    print('confusion matrix:')
    print('            predicted    ')
    print('          0     |     1')
    print('    ----------------------')
    print('   0 | ',format(result['tn'],'5d'),' | ',format(result['fp'],'5d'))
    print('gt -----------------------')
    print('   1 | ',format(result['fn'],'5d'),' | ',format(result['tp'],'5d'))
    print('---------------------------------------------------')

In [179]:
model.to(device)

model.zero_grad()

for epoch in range(num_train_epochs):

    model.train()
    epoch_loss = []
    
    for batch in train_dataloader:
        batch = get_inputs_dict(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs[0]
        loss.backward()
        optimizer.step()
        scheduler.step()
        model.zero_grad()
        epoch_loss.append(loss.item())
        
    #evaluate model with test_df at the end of the epoch.
    eval_loss = 0.0
    nb_eval_steps = 0
    n_batches = len(test_dataloader)
    preds = np.empty((len(test_dataset), num_labels))
    out_label_ids = np.empty((len(test_dataset)))
    model.eval()
    
    for i,test_batch in enumerate(test_dataloader):
        with torch.no_grad():
            test_batch = get_inputs_dict(test_batch)
            input_ids = test_batch['input_ids'].to(device)
            attention_mask = test_batch['attention_mask'].to(device)
            labels = test_batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            tmp_eval_loss, logits = outputs[:2]
            eval_loss += tmp_eval_loss.item()
            
        nb_eval_steps += 1
        start_index = test_batch_size * i
        end_index = start_index + test_batch_size if i != (n_batches - 1) else len(test_dataset)
        preds[start_index:end_index] = logits.detach().cpu().numpy()
        out_label_ids[start_index:end_index] = test_batch["labels"].detach().cpu().numpy()
        
    eval_loss = eval_loss / nb_eval_steps
    model_outputs = preds
    preds = np.argmax(preds, axis=1)
    result, wrong = compute_metrics(preds, model_outputs, out_label_ids, test_examples)
    
    print('epoch',epoch,'Training avg loss',np.mean(epoch_loss))
    print('epoch',epoch,'Testing  avg loss',eval_loss)
    print(result)
    print_confusion_matrix(result)
    print('---------------------------------------------------\n')

epoch 0 Training avg loss 0.6731157044289817
epoch 0 Testing  avg loss 0.6872977512803945
{'mcc': 0.0, 'tp': 0, 'tn': 593, 'fp': 0, 'fn': 447, 'auroc': 0.48192371100573056, 'auprc': 0.41998784119818866}
confusion matrix:
            predicted    
          0     |     1
    ----------------------
   0 |    593  |      0
gt -----------------------
   1 |    447  |      0
---------------------------------------------------
---------------------------------------------------

epoch 1 Training avg loss 0.6224236540391411
epoch 1 Testing  avg loss 0.6771883869713003
{'mcc': 0.102666584413626, 'tp': 36, 'tn': 573, 'fp': 20, 'fn': 411, 'auroc': 0.5903361740816611, 'auprc': 0.5160740138380092}
confusion matrix:
            predicted    
          0     |     1
    ----------------------
   0 |    573  |     20
gt -----------------------
   1 |    411  |     36
---------------------------------------------------
---------------------------------------------------

epoch 2 Training avg loss 0.57

epoch 17 Training avg loss 0.40796115662850124
epoch 17 Testing  avg loss 0.7839373668486421
{'mcc': 0.13466731850121552, 'tp': 107, 'tn': 513, 'fp': 80, 'fn': 340, 'auroc': 0.6274884842174361, 'auprc': 0.541375626050528}
confusion matrix:
            predicted    
          0     |     1
    ----------------------
   0 |    513  |     80
gt -----------------------
   1 |    340  |    107
---------------------------------------------------
---------------------------------------------------

epoch 18 Training avg loss 0.4087685693317736
epoch 18 Testing  avg loss 0.7778351144357161
{'mcc': 0.12684854501017648, 'tp': 116, 'tn': 500, 'fp': 93, 'fn': 331, 'auroc': 0.6284089923077214, 'auprc': 0.5414981743495166}
confusion matrix:
            predicted    
          0     |     1
    ----------------------
   0 |    500  |     93
gt -----------------------
   1 |    331  |    116
---------------------------------------------------
---------------------------------------------------

epoch 

In [82]:
train_loss = [0.678385621,0.632376498,0.592045852,0.556179642,0.530158635,0.506936796,0.492930673,0.47866862,0.466178885,0.456818814,0.44298748,0.43295857,0.424309122,0.412495723,0.405447978,0.398407371,0.389174767,0.390648836,0.381018993,0.377208777]
validation_loss = [0.693061956,0.675622712,0.70049012,0.706153017,0.699088515,0.722747884,0.703253298,0.73466689,0.713488277,0.707019135,0.746260223,0.738967306,0.753794444,0.783485738,0.769607162,0.778863441,0.788041065,0.796995666,0.797464624,0.79262726]

plt.figure()

plt.plot(range(20),train_loss, label='Training Loss')
plt.plot(range(20),validation_loss, label='Validation Loss')

ax = plt.gca()
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
plt.xlim(left=0)
plt.legend()
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.savefig(f"TextAnalysis/text_loss_vs_epoch.png")
plt.close()

