In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModel
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from transformers import DataCollatorWithPadding
import string
import re

In [55]:
# read Iron Man 3.txt line by line
with open('Iron Man 3.txt', 'r') as f:
    lines = f.readlines()
    # print(lines)

# print(lines[0])
# print(lines[1])
# print(lines[2])


In [56]:
# iterate through lines, if line contains a colon, split the line into speaker and dialogue
speaker_dialogue_rows = []
for line in lines:
    if ':' in line:
        # print(line)
        speaker, dialogue = line.split(':', 1)
        # print(speaker)
        # print(dialogue.strip())
        # add the speaker and dialogue to the dataframe
        speaker_dialogue_rows.append({'Speaker': speaker, 'Dialogue': dialogue.strip()})

speaker_dialogue_mapping = pd.DataFrame(speaker_dialogue_rows)



In [57]:
speaker_dialogue_mapping.head()

Unnamed: 0,Speaker,Dialogue
0,Tony Stark,We create our own demons. Who said that? What ...
1,Tony Stark,I'm gonna start again. Let's track this from t...
2,Happy Hogan,(to Maya.) Half hour till the ball drops.
3,Tony Stark,"Hey, do you want...?"
4,Party Guest,"Tony Stark? Great speech, man!"


In [58]:
# make a new dataframe with key as speaker and all the speakers dialogues as values(the values are joined by a space)
speaker_dialogue_mapping_grouped = speaker_dialogue_mapping.groupby('Speaker')['Dialogue'].apply(lambda x: ' '.join(x)).reset_index()


In [59]:
# print all the speakers and their dialogues
# for index, row in speaker_dialogue_mapping_grouped.iterrows():
    # print(row['Speaker'])
    # print(row['Dialogue'])
    # print('\n')

print(list(speaker_dialogue_mapping_grouped['Speaker']))

['Aldrich Killian', 'Announcer', 'Bill Maher', 'Both', 'Brandt', 'Bruce Banner', 'Cameraman', 'Chad Davis', 'Colonel James Rhodes', 'Computer Voice', 'Crew', 'EMCEE', 'Elk Ridge', 'Erin', 'Gary', 'Government Employee', 'Government Official #1', 'Government Official #2', 'Guard', 'Guard #1', 'Guard #2', 'Happy Hogan', "Happy's Nurse", 'Harley Keener', 'Heather', 'Ho Yinsen', 'Hospital News Reporter', 'JARVIS', 'Jarvis', 'Joan Rivers', 'Little Boy', 'Man', 'Mandarin Look-Out', 'Maya Hansen', 'Military Aide', 'Mrs. Davis', 'News Reporter #1', 'News Reporter #2', 'News Reporter #3', 'News Reporter #4', 'Nurse', 'Officer', 'Officer #2', 'Party Guest', 'Pepper Potts', "Pepper's Assistant", 'President Ellis', 'Pushy Tabloid Reporter', 'Rhodey', 'Rose Hill Christmas Tree Shopper', 'Rose Hills Sheriff', 'Savin', 'Taggert', 'The Mandarin', 'Thomas Richards', 'Tony Stark', 'Trevor Slattery', 'Vanessa', 'Vice President', 'Woman']


In [60]:
for index, row in speaker_dialogue_mapping_grouped.iterrows():
    print(row['Speaker'])
    print(row['Dialogue'])
    print('\n')

Aldrich Killian
Mr. Stark! Oh, wow! Hey, Tony! Aldrich Killian. (Stuttering to Maya) I'm a big fan of your work! Well, of course. But, Miss Hansen, my organization has been tracking your research since year two of MIT. Oh, now, that is an appropriate question. The ground floor, actually. I've got a proposal I'm putting together with myself. It's a privately-funded think tank called, Advanced Idea Mechanics. [He holds out two business cards towards Tony and Maya] Advanced Idea Mechanics, or AIM for short. (Points to the logo on his shirt) Do you get it? Aw! Yeah? I'll see you up there. Pepper. You look great. You look really great. Nothing fancy, just five years in the hands of physical therapists. And please, call me Aldrich. After years dodging the President's ban on "immoral biotech research", my think tank now has a little something in the pipeline. It's an idea we like to call Extremis. I'm gonna turn your lights down. Regard the human brain. Uh...wait. Hold on, hold on. That's...t

In [61]:
speaker_dialogue_mapping_grouped.head()

Unnamed: 0,Speaker,Dialogue
0,Aldrich Killian,"Mr. Stark! Oh, wow! Hey, Tony! Aldrich Killian..."
1,Announcer,(On PA) Broadcast will commence shortly. Take ...
2,Bill Maher,And how is President Ellis responding? By taki...
3,Both,...Genetic operating system... Human application.
4,Brandt,Thank you. [Tony notices the woman has burn ma...


### non-mcu dataset

In [87]:
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score

In [60]:
df = pd.read_csv('archive/mbti_1.csv')

In [61]:
df.head()
print(len(df))

8675


In [62]:
personality_types = df['type'].unique() 
print(personality_types)

['INFJ' 'ENTP' 'INTP' 'INTJ' 'ENTJ' 'ENFJ' 'INFP' 'ENFP' 'ISFP' 'ISTP'
 'ISFJ' 'ISTJ' 'ESTP' 'ESFP' 'ESTJ' 'ESFJ']


In [63]:
def clean_text(text):
    regex = re.compile('[%s]' % re.escape('|'))
    text = regex.sub(" ", text)
    words = str(text).split()
    words = [i.lower() + " " for i in words]
    words = [i for i in words if not "http" in i]
    words = " ".join(words)
    words = words.translate(words.maketrans('', '', string.punctuation))
    return words

In [64]:
df['cleaned_text'] = df['posts'].apply(clean_text)

In [65]:
df.head()

Unnamed: 0,type,posts,cleaned_text
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...,enfp and intj moments sportscenter not t...
1,ENTP,'I'm finding the lack of me in these posts ver...,im finding the lack of me in these pos...
2,INTP,'Good one _____ https://www.youtube.com/wat...,good one of course to which i say i ...
3,INTJ,"'Dear INTP, I enjoyed our conversation the o...",dear intp i enjoyed our conversation the...
4,ENTJ,'You're fired.|||That's another silly misconce...,youre fired thats another silly misconcep...


In [66]:
df.drop(columns=['posts'], inplace=True)

In [67]:
df.rename(columns={'cleaned_text': 'posts'}, inplace=True)
df.head()

Unnamed: 0,type,posts
0,INFJ,enfp and intj moments sportscenter not t...
1,ENTP,im finding the lack of me in these pos...
2,INTP,good one of course to which i say i ...
3,INTJ,dear intp i enjoyed our conversation the...
4,ENTJ,youre fired thats another silly misconcep...


In [68]:
#'INFJ' 'ENTP' 'INTP' 'INTJ' 'ENTJ' 'ENFJ' 'INFP' 'ENFP' 'ISFP' 'ISTP' 'ISFJ' 'ISTJ' 'ESTP' 'ESFP' 'ESTJ' 'ESFJ'
id2label = {0: 'INFJ', 1: 'ENTP', 2: 'INTP', 3: 'INTJ', 4: 'ENTJ', 5: 'ENFJ', 6: 'INFP', 7: 'ENFP', 8: 'ISFP', 9: 'ISTP', 10: 'ISFJ', 11: 'ISTJ', 12: 'ESTP', 13: 'ESFP', 14: 'ESTJ', 15: 'ESFJ'} 
label2id = {'INFJ': 0, 'ENTP': 1, 'INTP': 2, 'INTJ': 3, 'ENTJ': 4, 'ENFJ': 5, 'INFP': 6, 'ENFP': 7, 'ISFP': 8, 'ISTP': 9, 'ISFJ': 10, 'ISTJ': 11, 'ESTP': 12, 'ESFP': 13, 'ESTJ': 14, 'ESFJ': 15} # label rep

In [69]:
df['label'] = df['type'].map(label2id)
df.head()

Unnamed: 0,type,posts,label
0,INFJ,enfp and intj moments sportscenter not t...,0
1,ENTP,im finding the lack of me in these pos...,1
2,INTP,good one of course to which i say i ...,2
3,INTJ,dear intp i enjoyed our conversation the...,3
4,ENTJ,youre fired thats another silly misconcep...,4


In [70]:
# split df into train and test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [71]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [72]:
train_df.head()

Unnamed: 0,type,posts,label
4080,INFJ,i loved all the light we cannot see by...,0
2614,ENFP,it depends if i care about it i fight ...,7
5414,ENTP,welcome home sonny laughing just because ...,1
1039,ENFP,thats really cool of you i like it whe...,7
8294,ENTP,the duck is named zeus nope see traits ...,1


In [77]:
# encode the text

encoded_train_data = tokenizer.batch_encode_plus(
    tqdm(train_df['posts'].values), 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt')

  0%|          | 0/6940 [00:00<?, ?it/s]

100%|██████████| 6940/6940 [04:34<00:00, 25.33it/s]


In [78]:
encoded_test_data = tokenizer.batch_encode_plus(
    tqdm(test_df['posts'].values), 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt')

100%|██████████| 1735/1735 [01:09<00:00, 24.87it/s]


In [79]:
encoded_train_data.keys()
encoded_train_data


{'input_ids': tensor([[  101,  1045,  3866,  ...,  2030,  8426,   102],
        [  101,  2009,  9041,  ...,  2189,  1999,   102],
        [  101,  6160,  2188,  ..., 27036,  1998,   102],
        ...,
        [  101,  4921,  2063,  ...,  3030,  3331,   102],
        [  101,  2130,  1996,  ...,  1999,  3622,   102],
        [  101,  2077,  3752,  ...,  2008,  2115,   102]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        ...,
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1]])}

In [81]:
# inputs for BERT model
input_ids_train = encoded_train_data['input_ids']
attention_masks_train = encoded_train_data['attention_mask']
labels_train = torch.tensor(train_df['label'].values)

input_ids_test = encoded_test_data['input_ids']
attention_masks_test = encoded_test_data['attention_mask']
labels_test = torch.tensor(test_df['label'].values)

In [82]:
train_dataset = torch.utils.data.TensorDataset(input_ids_train, attention_masks_train, labels_train)
test_dataset = torch.utils.data.TensorDataset(input_ids_test, attention_masks_test, labels_test)

In [85]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=16, output_attentions=False, output_hidden_states=False)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
BATCH_SIZE = 32
EPOCHS = 10

In [91]:
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [92]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criteria = torch.nn.CrossEntropyLoss()

In [3]:
# training loop
for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    train_preds = []
    train_true = []
    train_iterator = tqdm(train_dataloader, desc = 'Training')
    for i, batch in enumerate(train_dataloader):
        input_ids, attention_masks, labels = tuple(t.to(device) for t in batch)
        model.zero_grad()
        outputs = model(input_ids, attention_masks, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        train_loss += loss.item()
        loss.backward()
        optimizer.step()
        preds = torch.argmax(logits, dim=1)
        train_preds.extend(preds.cpu().numpy())
        train_true.extend(labels.cpu().numpy())
    train_loss = train_loss / len(train_dataloader)
    train_acc = accuracy_score(train_true, train_preds)
    train_f1 = f1_score(train_true, train_preds, average='weighted')
    print(f'Epoch {epoch + 1}/{EPOCHS}')
    print(f'Train loss: {train_loss}, Train acc: {train_acc}, Train f1: {train_f1}')

    model.eval()
    test_loss = 0
    test_preds = []
    test_true = []
    test_iterator = tqdm(test_dataloader, desc = 'Testing')
    for i, batch in enumerate(test_dataloader):
        input_ids, attention_masks, labels = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            outputs = model(input_ids, attention_masks, labels=labels)
            loss = outputs.loss
            logits = outputs.logits
            test_loss += loss.item()
            preds = torch.argmax(logits, dim=1)
            test_preds.extend(preds.cpu().numpy())
            test_true.extend(labels.cpu().numpy())
    test_loss = test_loss / len(test_dataloader)
    test_acc = accuracy_score(test_true, test_preds)
    test_f1 = f1_score(test_true, test_preds, average='weighted')
    print(f'Test loss: {test_loss}, Test acc: {test_acc}, Test f1: {test_f1}')

NameError: name 'model' is not defined

### Using Roberta with one linear layer for classification

In [175]:
from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer, RobertaModel
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
import string
import re

In [176]:
df = pd.read_csv('archive/mbti_1.csv')

In [177]:
df.head()
print(len(df))

8675


In [178]:
personality_types = df['type'].unique() 
print(personality_types)

['INFJ' 'ENTP' 'INTP' 'INTJ' 'ENTJ' 'ENFJ' 'INFP' 'ENFP' 'ISFP' 'ISTP'
 'ISFJ' 'ISTJ' 'ESTP' 'ESFP' 'ESTJ' 'ESFJ']


In [179]:
def clean_text(text):
    regex = re.compile('[%s]' % re.escape('|'))
    text = regex.sub(" ", text)
    words = str(text).split()
    words = [i.lower() + " " for i in words]
    words = [i for i in words if not "http" in i]
    words = " ".join(words)
    words = words.translate(words.maketrans('', '', string.punctuation))
    return words

In [180]:
df['cleaned_text'] = df['posts'].apply(clean_text)

In [181]:
df.head()

Unnamed: 0,type,posts,cleaned_text
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...,enfp and intj moments sportscenter not t...
1,ENTP,'I'm finding the lack of me in these posts ver...,im finding the lack of me in these pos...
2,INTP,'Good one _____ https://www.youtube.com/wat...,good one of course to which i say i ...
3,INTJ,"'Dear INTP, I enjoyed our conversation the o...",dear intp i enjoyed our conversation the...
4,ENTJ,'You're fired.|||That's another silly misconce...,youre fired thats another silly misconcep...


In [182]:
df.drop(columns=['posts'], inplace=True)

In [183]:
df.rename(columns={'cleaned_text': 'posts'}, inplace=True)
df.head()

Unnamed: 0,type,posts
0,INFJ,enfp and intj moments sportscenter not t...
1,ENTP,im finding the lack of me in these pos...
2,INTP,good one of course to which i say i ...
3,INTJ,dear intp i enjoyed our conversation the...
4,ENTJ,youre fired thats another silly misconcep...


In [184]:
#'INFJ' 'ENTP' 'INTP' 'INTJ' 'ENTJ' 'ENFJ' 'INFP' 'ENFP' 'ISFP' 'ISTP' 'ISFJ' 'ISTJ' 'ESTP' 'ESFP' 'ESTJ' 'ESFJ'
id2label = {0: 'INFJ', 1: 'ENTP', 2: 'INTP', 3: 'INTJ', 4: 'ENTJ', 5: 'ENFJ', 6: 'INFP', 7: 'ENFP', 8: 'ISFP', 9: 'ISTP', 10: 'ISFJ', 11: 'ISTJ', 12: 'ESTP', 13: 'ESFP', 14: 'ESTJ', 15: 'ESFJ'} 
label2id = {'INFJ': 0, 'ENTP': 1, 'INTP': 2, 'INTJ': 3, 'ENTJ': 4, 'ENFJ': 5, 'INFP': 6, 'ENFP': 7, 'ISFP': 8, 'ISTP': 9, 'ISFJ': 10, 'ISTJ': 11, 'ESTP': 12, 'ESFP': 13, 'ESTJ': 14, 'ESFJ': 15} # label rep

In [185]:
df['label'] = df['type'].map(label2id)
df.head()

Unnamed: 0,type,posts,label
0,INFJ,enfp and intj moments sportscenter not t...,0
1,ENTP,im finding the lack of me in these pos...,1
2,INTP,good one of course to which i say i ...,2
3,INTJ,dear intp i enjoyed our conversation the...,3
4,ENTJ,youre fired thats another silly misconcep...,4


In [186]:
# check max length of a value in the posts column
max_len = df['posts'].apply(lambda x: len(x.split())).max()
print(max_len)


1929


In [204]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

In [188]:
# make a dataset class
class MBTIDataset(torch.utils.data.Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = self.data.iloc[index]['posts']
        inputs = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            return_attention_mask=True,
            return_tensors='pt',
            truncation=True
        ) 
        # print (inputs)
        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': torch.tensor(self.data.iloc[index]['label'], dtype=torch.long)
        }

In [189]:
max_length = 256

In [190]:
train_dataset = MBTIDataset(train_df, tokenizer, max_length)
test_dataset = MBTIDataset(test_df, tokenizer, max_length)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=2)

In [191]:
for batch in train_loader:
    print(batch)
    break

{'input_ids': tensor([[    0,   757,  1437,   202,  1437,    10,  1437, 33799,  1437,    89,
          1437,    32,  1437,    80,  1437,  2188,  1437,   269,  1437,     5,
          1437,    78,  1437,    74,  1437,    28,  1437,    14,  1437,    63,
          1437,   373,  1437,   442,  1437,   657,  1437,    13,  1437,    10,
          1437,  1219,  1437,     8,  1437,   939,  1437, 33976,  1437,   206,
          1437,    47,  1437,    64,  1437,   109,  1437,    24,  1437,  3099,
          6459,  1437,  3867,  1437,   110,  1437,    11,  1437,   657,  1437,
            61,  1437,  1437,  2088,  1437,   393,  1437,    57,  1437,  4356,
          1437,   182,  1437, 31201,  1437,    24,  1437,  1302,  1437,    65,
          1437,   183,  1437,  4356,  1437,    11,  1437,  3016,  4717,  1437,
             8,  1437, 12256,  1437, 10844,  1437,   172,  1437,   220,  1437,
          4356,  1437,    11,  1437,    70,  1437,   909,  1437,    19,  1437,
            10,  1437, 16576,  1437,  

In [192]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criteria = torch.nn.CrossEntropyLoss()

In [193]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cpu


In [197]:
# defining model architecture
class RobertaClassifier(torch.nn.Module):
    def __init__(self, model):
        super(RobertaClassifier, self).__init__()
        self.model = model
        self.fc = torch.nn.Linear(768, 16)
        
    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids, attention_mask)
        last_hidden_state = outputs.last_hidden_state[:, 0, :]
        logits = self.fc(last_hidden_state)
        return logits
    
model = RobertaModel.from_pretrained('roberta-base')
model = RobertaClassifier(model)
model.to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaClassifier(
  (model): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): Lay

In [202]:
def train(model, train_loader, valid_loader, optimizer, criteria, epochs=1):
    model.train()
    train_losses = []
    valid_losses = []
    for epoch in range(epochs):
        total_loss = 0
        avg_f1_micro = 0
        avg_f1_macro = 0
        avg_f1_weighted = 0
        for i, batch in enumerate(tqdm(train_loader)):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask)
            loss = criteria(outputs, labels)
            optimizer.step()
            total_loss += loss.item()
            # calculate micro f1 score, macro f1 and weighted f1
            # labels = torch.argmax(labels, dim=1)
            preds = torch.argmax(outputs, dim=1)
            # acc = accuracy_score(labels.cpu(), preds.cpu())
            f1_micro = f1_score(labels.cpu(), preds.cpu(), average='micro')
            f1_macro = f1_score(labels.cpu(), preds.cpu(), average='macro')
            f1_weighted = f1_score(labels.cpu(), preds.cpu(), average='weighted')
            avg_f1_micro += f1_micro
            avg_f1_macro += f1_macro
            avg_f1_weighted += f1_weighted
        
        avg_f1_micro /= len(train_loader)
        avg_f1_macro /= len(train_loader)
        avg_f1_weighted /= len(train_loader)
        train_losses.append(total_loss/len(train_loader))
        print(f'Epoch {epoch + 1}, Training Loss: {total_loss/len(train_loader)}, Training F1 Micro: {avg_f1_micro}, Training F1 Macro: {avg_f1_macro}, Training F1 Weighted: {avg_f1_weighted}')

        model.eval()
        with torch.no_grad():
            val_loss = 0
            avg_f1_micro = 0
            avg_f1_macro = 0
            avg_f1_weighted = 0
            for i, batch in enumerate(tqdm(valid_loader)):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)
                loss, preds = model(input_ids, attention_mask)
                val_loss += loss.item()
                labels = torch.argmax(labels, dim=1)
                preds = torch.argmax(preds, dim=1)
                f1_micro = f1_score(labels.cpu(), preds.cpu(), average='micro')
                f1_macro = f1_score(labels.cpu(), preds.cpu(), average='macro')
                f1_weighted = f1_score(labels.cpu(), preds.cpu(), average='weighted')
                avg_f1_micro += f1_micro
                avg_f1_macro += f1_macro
                avg_f1_weighted += f1_weighted
            avg_f1_micro /= len(valid_loader)
            avg_f1_macro /= len(valid_loader)
            avg_f1_weighted /= len(valid_loader)
            valid_losses.append(val_loss/len(valid_loader))
            print(f'Epoch {epoch + 1}, Validation Loss: {val_loss/len(valid_loader)}, Validation F1 Micro: {avg_f1_micro}, Validation F1 Macro: {avg_f1_macro}, Validation F1 Weighted: {avg_f1_weighted}')
    return train_losses, valid_losses

    

In [203]:
train_losses, valid_losses = train(model, train_loader, test_loader, optimizer, criteria)

  0%|          | 13/3470 [00:15<1:06:36,  1.16s/it]


KeyboardInterrupt: 

In [107]:
# make a dataframe with colums as character and personality
true_personality_labels = pd.DataFrame(columns=['Character', 'Personality'])

# add elements to the dataframe
true_personality_labels = true_personality_labels.append({'Character': 'Tony Stark', 'Personality': 'ENTP'}, ignore_index=True)
true_personality_labels = true_personality_labels.append({'Character': 'Pepper Potts', 'Personality': 'ISTJ'}, ignore_index=True)
true_personality_labels = true_personality_labels.append({'Character': 'James Rhodes', 'Personality': 'ISTJ'}, ignore_index=True)
true_personality_labels = true_personality_labels.append({'Character': 'Aldrich Killian', 'Personality': 'ENTJ'}, ignore_index=True)
true_personality_labels = true_personality_labels.append({'Character': 'Maya Hansen', 'Personality': 'INFJ'}, ignore_index=True)
true_personality_labels = true_personality_labels.append({'Character': 'Ellen Brandt', 'Personality': 'ISTP'}, ignore_index=True)



Unnamed: 0,Character,Personality
