In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [1]:
import torch
import pandas as pd
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification, AdamW
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix, classification_report
import numpy as np
import json
from collections import defaultdict

In [2]:
SEED = 123
torch.manual_seed(SEED)
np.random.seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
train_path = '/kaggle/input/conv-forecast/conv_derailment_data/datasets/cmv/base/train.json'
test_path = '/kaggle/input/conv-forecast/conv_derailment_data/datasets/cmv/base/test.json'
val_path = '/kaggle/input/conv-forecast/conv_derailment_data/datasets/cmv/base/valid.json'

train_data = []
with open(train_path,'r') as f:
  for line in f:
    train_data.append(json.loads(line))

test_data = []
with open(test_path,'r') as f:
  for line in f:
    test_data.append(json.loads(line))

val_data = []
with open(val_path,'r') as f:
  for line in f:
    val_data.append(json.loads(line))

In [None]:
# grouped_train = defaultdict(list)
# for item in train_data:
#     convo_id = item['convo_id']
#     grouped_train[convo_id].append(item)


# # duplicated convo_ids
# duplicated_convo_ids = [k for k, v in grouped_train.items() if len(v) > 1]
# print(duplicated_convo_ids)

In [14]:
train_data[1000]

{'src': " just say no, to the nanny state. we don't need government banning everything that can hurt someone. people enjoy football, let them enjoy it, and let children and parents who are afraid of their own shadows ban their kids from playing. [SEP]  yeah, and bring back the lead paint and asbestos! [SEP]  yeah, lead paint and asbesto's, exactly the same thing as a sport. [SEP]  yep, they're all things that have been proven to be dangerous, particularly to children.",
 'reply': [101, 100, 102],
 'tgt': True,
 'convo_id': 1543424,
 'comment_id': 4}

In [7]:
from transformers import BertTokenizer
tokenizer_bert = BertTokenizer.from_pretrained('bert-base-uncased')

In [8]:
reply = train_data[0]['reply']
tokenizer_bert.decode(reply)

2024-04-19 01:05:08.730385: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-19 01:05:08.730541: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-19 01:05:08.893112: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


'[CLS] [UNK] [SEP]'

In [16]:
for item in train_data:
    item['src'] = item['src'].replace('[SEP]', '[EOS]')
    item['src'] = item['src'].replace('[UNK]', ' ')
#     item['reply'] = tokenizer_bert.decode(item['reply'], skip_special_tokens=True)

for item in val_data:
    item['src'] = item['src'].replace('[SEP]', '[EOS]')
    item['src'] = item['src'].replace('[UNK]', ' ')
#     item['reply'] = tokenizer_bert.decode(item['reply'], skip_special_tokens=True)

for item in test_data:
    item['src'] = item['src'].replace('[SEP]', '[EOS]')
    item['src'] = item['src'].replace('[UNK]', ' ')
#     item['reply'] = tokenizer_bert.decode(item['reply'], skip_special_tokens=True)


In [17]:
def data_for_gpt2(data):
  src_text = data['src']
#   reply = data['reply']
  tgt = data['tgt']
  combined_text = f"<SRC>: {src_text}"
  return combined_text, tgt

In [18]:
train_data_gpt2 = []
tgt_train = []
for item in train_data:
  formatted_text, tgt = data_for_gpt2(item)
  train_data_gpt2.append(formatted_text)
  tgt_train.append(tgt)

In [19]:
tgt_train[0]

True

In [21]:
print(train_data_gpt2[0])

<SRC>:  ( okay, i've seen this view come up a few times before and i've always been unsuccessful in convincing people about why they're wrong. however, it seems that your view is based on studies, so maybe you'll respond well to evidence / u / carlosriccy. ) i cannot dispute the fact that there is a measurable iq gap b / w white students  it's not just black and white america though. sub - saharan africans and caribbeans of african decent by far score the lowest on average of ethnic groups, with american blacks only performing marginally better. and the tests don't measure anything that is taught. they measure abstract reasoning. that's why you can give an iq test to a child, and also why gifted children generally can score  abstract reasoning is a skill that can be nurtured or hindered. it doesn't simply depend on your genetics. twin studies show that although there is a genetic component to iq, it is far from a complete predictor of iq. ( see [ wikipedia ] ( https : / / en. wikipedia

In [22]:
val_data_gpt2 = []
tgt_val = []
for item in val_data:
  formatted_text, tgt = data_for_gpt2(item)
  val_data_gpt2.append(formatted_text)
  tgt_val.append(tgt)

In [23]:
val_data_gpt2[0]

'<SRC>:  i don\'t think your 4th point makes much sense. how exactly would extra pronouns make things less clear? under the current system, the singular " they " can refer to male, female, and other identities, which is about as unclear as things can get. even if you don\'t know the exact definition of " xe, " for example, it\'s at least clear that the individual is actively trying to avoid a more traditional word because it doesn\'t reflect who they consider themselves to be. this is more information, not less. [EOS]  but it isn\'t unclear who the pronoun is referring to : " they " had been both singular and plural long before this special snowflake syndrome phenomena. the only reason to change this is to ostensibly insert identity politics into casual language. eg : " have you met terry? they will be at the party tonight. " pronoun has successfully informed you that terry will be at the party. _ _ _ edit : additional edited text from deleted comment : i\'m talking about the word " th

In [24]:
tgt_val[0]

True

In [25]:
test_data_gpt2 = []
tgt_test = []
for item in test_data:
  formatted_text, tgt = data_for_gpt2(item)
  test_data_gpt2.append(formatted_text)
  tgt_test.append(tgt)

In [26]:
from transformers import AutoTokenizer
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
from tqdm import tqdm

In [27]:
from transformers import GPT2ForSequenceClassification, GPT2Config
import torch
import torch.nn.functional as F

class GPT2ForSequenceClassificationMeanPooling(GPT2ForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)
        self.gpt2 = self.transformer  # Assuming the GPT2ForSequenceClassification model has a 'transformer' attribute
        self.score = torch.nn.Linear(config.hidden_size, config.num_labels)

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        labels=None,
        **kwargs
    ):
        outputs = self.gpt2(
            input_ids=input_ids,
            attention_mask=attention_mask,
            **kwargs,
        )

        # Get the hidden states from the last layer
        hidden_states = outputs.last_hidden_state

        # Apply mean pooling on the hidden states
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(hidden_states.size()).float()
        sum_hidden_states = torch.sum(hidden_states * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        mean_hidden_states = sum_hidden_states / sum_mask

        # Pass the pooled hidden states through the classification head
        logits = self.score(mean_hidden_states)

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    loss_fct = torch.nn.MSELoss()
                    loss = loss_fct(logits.view(-1), labels.view(-1))
                else:
                    loss_fct = torch.nn.CrossEntropyLoss()
                    loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            else:
                raise ValueError("You should supply an instance of `PreTrainedModel` or a `config`")

        output = (logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output

In [28]:
### Loading Model and Tokenizers
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
tokenizer = GPT2Tokenizer.from_pretrained('gpt2',
                                         add_eos_token=True)
model = GPT2ForSequenceClassificationMeanPooling.from_pretrained('gpt2', num_labels=2)
print(model)
model.to(device)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Some weights of GPT2ForSequenceClassificationMeanPooling were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.bias', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPT2ForSequenceClassificationMeanPooling(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (score): Linear(in_features=768, out_features=2, bias=True)
  (gpt2): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop)

GPT2ForSequenceClassificationMeanPooling(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (score): Linear(in_features=768, out_features=2, bias=True)
  (gpt2): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop)

In [29]:
## Defining Special Tokens
tokenizer.pad_token = tokenizer.eos_token
tokenizer.sep_token = '[EOS]'
special_tokens = {'additional_special_tokens': ['<SRC>:', ' <REPLY>:', '[UNK]']}
num_added = tokenizer.add_special_tokens(special_tokens)
print(num_added)
model.resize_token_embeddings(len(tokenizer))

3


Embedding(50260, 768)

In [30]:
# Tokenizing Data
train_encodings = tokenizer(train_data_gpt2, truncation=False, padding=False)

In [31]:
max_length = max(len(x) for x in train_encodings.input_ids)

In [32]:
max_length

578

In [33]:
val_encodings = tokenizer(val_data_gpt2, truncation=False, padding=False)

In [34]:
val_max_length = max(len(x) for x in val_encodings.input_ids)
val_max_length

581

In [35]:
test_encodings = tokenizer(test_data_gpt2, truncation=False, padding=False)

In [36]:
test_max_length = max(len(x) for x in test_encodings.input_ids)
test_max_length

575

In [37]:
## Redefining Tokenizer with padding and max length
train_inputs = tokenizer(train_data_gpt2, truncation=True, padding=True, max_length=max_length, return_tensors='pt')

# Create torch dataset
input_ids = train_inputs['input_ids']
attention_mask = train_inputs['attention_mask']
labels = torch.tensor([0 if tgt == False else 1 for tgt in tgt_train])
train_dataset = TensorDataset(input_ids, attention_mask, labels)

In [38]:
len(input_ids[0])

578

In [39]:
val_inputs = tokenizer(val_data_gpt2, truncation=True, padding=True, max_length=val_max_length, return_tensors='pt')
val_input_ids = val_inputs['input_ids']
val_attention_mask = val_inputs['attention_mask']
val_labels = torch.tensor([0 if tgt == False else 1 for tgt in tgt_val])
val_dataset = TensorDataset(val_input_ids, val_attention_mask, val_labels)

In [40]:
test_inputs = tokenizer(test_data_gpt2, truncation=True, padding=True, max_length=test_max_length, return_tensors='pt')
test_input_ids = test_inputs['input_ids']
test_attention_mask = test_inputs['attention_mask']
test_labels = torch.tensor([0 if tgt == False else 1 for tgt in tgt_test])
test_dataset = TensorDataset(test_input_ids, test_attention_mask, test_labels)

In [41]:
## CREATING DATALOADERS
batch_size = 4
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [42]:
import copy

In [43]:
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau

optimizer = AdamW(model.parameters(), lr=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=1, verbose=True)


In [44]:
from sklearn.metrics import f1_score
from tqdm import tqdm

# Training and validation loop
num_epochs = 5
train_losses = []
val_losses = []
f1_scores = []
best_val_loss = float('inf')
best_model_state = None

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1} of {num_epochs}")
    train_loss = 0
    
    model.train()
    for batch in tqdm(train_dataloader, desc='Training Progress', miniters=10):
        input_ids, attention_mask, batch_labels = [b.to(device) for b in batch]
        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=batch_labels)
        loss = outputs[0]
        train_loss += loss.item()
        loss.backward()
        optimizer.step()
    
    avg_train_loss = train_loss / len(train_dataloader)
    train_losses.append(avg_train_loss)
    print(f"Training loss: {avg_train_loss:.2f}")

    # Validation phase
    val_loss = 0
    model.eval()
    val_preds = []
    val_true_labels = []
    with torch.no_grad():
        for batch in tqdm(val_dataloader, desc='Validation Progress', miniters=10):
            input_ids, attention_mask, batch_labels = [b.to(device) for b in batch]
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=batch_labels)
            loss = outputs[0]
            val_loss += loss.item()
            logits = outputs[1]
            preds = torch.argmax(torch.nn.functional.softmax(logits, dim=1), dim=1)
            val_preds.extend(preds.cpu().numpy())
            val_true_labels.extend(batch_labels.cpu().numpy())

        avg_val_loss = val_loss / len(val_dataloader)
        val_losses.append(avg_val_loss)
        f1 = f1_score(val_true_labels, val_preds, average='macro')
        f1_scores.append(f1)
        print(f"Validation loss: {avg_val_loss:.2f}, F1 score: {f1:.2f}")

    scheduler.step(avg_val_loss)

    # Check if the current model is the best one
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        best_model_state = model.state_dict()
        torch.save(best_model_state, 'CMV_Base_model_GPT2.pth')  # Save the best model
        print("Saved new best model")


Epoch 1 of 5


Training Progress: 100%|██████████| 1027/1027 [05:33<00:00,  3.08it/s]


Training loss: 0.74


Validation Progress: 100%|██████████| 342/342 [00:35<00:00,  9.74it/s]


Validation loss: 0.73, F1 score: 0.40
Saved new best model
Epoch 2 of 5


Training Progress: 100%|██████████| 1027/1027 [05:32<00:00,  3.09it/s]


Training loss: 0.65


Validation Progress: 100%|██████████| 342/342 [00:35<00:00,  9.74it/s]


Validation loss: 0.64, F1 score: 0.61
Saved new best model
Epoch 3 of 5


Training Progress: 100%|██████████| 1027/1027 [05:32<00:00,  3.09it/s]


Training loss: 0.61


Validation Progress: 100%|██████████| 342/342 [00:35<00:00,  9.73it/s]


Validation loss: 0.69, F1 score: 0.55
Epoch 4 of 5


Training Progress: 100%|██████████| 1027/1027 [05:32<00:00,  3.09it/s]


Training loss: 0.56


Validation Progress: 100%|██████████| 342/342 [00:35<00:00,  9.73it/s]


Validation loss: 0.65, F1 score: 0.63
Epoch 00004: reducing learning rate of group 0 to 1.0000e-06.
Epoch 5 of 5


Training Progress: 100%|██████████| 1027/1027 [05:32<00:00,  3.09it/s]


Training loss: 0.46


Validation Progress: 100%|██████████| 342/342 [00:35<00:00,  9.74it/s]


Validation loss: 0.64, F1 score: 0.65
Saved new best model


In [46]:
model.eval()
test_loss = 0
test_preds = []
test_true_labels = []
with torch.no_grad():
    for batch in tqdm(test_dataloader, desc='Test Progress', miniters = 10):
        input_ids, attention_mask, batch_labels = batch
        input_ids, attention_mask, batch_labels = input_ids.to(device), attention_mask.to(device), batch_labels.to(device)
        model.config.pad_token_id = tokenizer.eos_token_id
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=batch_labels)
        loss = outputs[0]  
        test_loss += loss.item()
        logits = outputs[1] 
        probs = torch.nn.functional.softmax(logits, dim=1)
        preds = torch.argmax(probs, dim=1)
        test_preds.extend(preds.cpu().numpy())
        test_true_labels.extend(batch_labels.cpu().numpy())
        
avg_test_loss = test_loss / len(test_dataloader)
f1 = f1_score(test_true_labels, test_preds, average='macro')
print(f"Test loss: {avg_test_loss:.2f}, F1 score: {f1:.2f}")

Test Progress: 100%|██████████| 2117/2117 [03:23<00:00, 10.40it/s]

Test loss: 0.80, F1 score: 0.56





In [47]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

F1_score = f1_score(test_true_labels, test_preds, average='macro')
accuracy = accuracy_score(test_true_labels, test_preds)
precision = precision_score(test_true_labels, test_preds, average='macro')
recall = recall_score(test_true_labels, test_preds, average='macro')

print(f"F1 score: {F1_score}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

F1 score: 0.55792341779679
Accuracy: 0.5624852350578786
Precision: 0.5726520392064478
Recall: 0.568298208605316


In [48]:
conf_matrix = confusion_matrix(test_true_labels, test_preds)
classification_report = classification_report(test_true_labels, test_preds)

print(conf_matrix)
print(classification_report)

[[2811 1229]
 [2475 1951]]
              precision    recall  f1-score   support

           0       0.53      0.70      0.60      4040
           1       0.61      0.44      0.51      4426

    accuracy                           0.56      8466
   macro avg       0.57      0.57      0.56      8466
weighted avg       0.57      0.56      0.56      8466



In [49]:
data = {'True Labels': test_true_labels, 'Predictions': test_preds}
prediction_comparison_df = pd.DataFrame(data)

In [51]:
prediction_comparison_df[10:100]

Unnamed: 0,True Labels,Predictions
10,1,1
11,1,1
12,0,1
13,0,0
14,0,0
...,...,...
95,1,0
96,0,1
97,0,1
98,0,1


# LOADING SAVED MODEL

In [52]:
model_1 = GPT2ForSequenceClassificationMeanPooling.from_pretrained('gpt2', num_labels=2)
model_1.resize_token_embeddings(len(tokenizer)) 
model_1.load_state_dict(torch.load('/kaggle/working/CMV_Base_model_GPT2.pth', map_location=device))
model_1.to(device)

Some weights of GPT2ForSequenceClassificationMeanPooling were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.bias', 'score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPT2ForSequenceClassificationMeanPooling(
  (transformer): GPT2Model(
    (wte): Embedding(50260, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (score): Linear(in_features=768, out_features=2, bias=True)
  (gpt2): GPT2Model(
    (wte): Embedding(50260, 768)
    (wpe): Embedding(1024, 768)
    (drop)

In [53]:
model_1.eval()
test_loss = 0
test_preds = []
test_true_labels = []
with torch.no_grad():
    for batch in tqdm(test_dataloader, desc='Test Progress', miniters = 10):
        input_ids, attention_mask, batch_labels = batch
        input_ids, attention_mask, batch_labels = input_ids.to(device), attention_mask.to(device), batch_labels.to(device)
        model_1.config.pad_token_id = tokenizer.eos_token_id
        outputs = model_1(input_ids=input_ids, attention_mask=attention_mask, labels=batch_labels)
        loss = outputs[0]  # Extract the loss value from the tuple
        test_loss += loss.item()
        logits = outputs[1] 
        probs = torch.nn.functional.softmax(logits, dim=1)
        preds = torch.argmax(probs, dim=1)
        test_preds.extend(preds.cpu().numpy())
        test_true_labels.extend(batch_labels.cpu().numpy())
        
avg_test_loss = test_loss / len(test_dataloader)
f1 = f1_score(test_true_labels, test_preds, average='macro')
print(f"Validation loss: {avg_test_loss:.2f}, F1 score: {f1:.2f}")

Test Progress: 100%|██████████| 2117/2117 [03:23<00:00, 10.40it/s]

Validation loss: 0.80, F1 score: 0.56





In [54]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

F1_score = f1_score(test_true_labels, test_preds, average='macro')
accuracy = accuracy_score(test_true_labels, test_preds)
precision = precision_score(test_true_labels, test_preds, average='macro')
recall = recall_score(test_true_labels, test_preds, average='macro')

print(f"F1 score: {F1_score}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

F1 score: 0.55792341779679
Accuracy: 0.5624852350578786
Precision: 0.5726520392064478
Recall: 0.568298208605316


In [55]:
conf_matrix = confusion_matrix(test_true_labels, test_preds)
classification_report = classification_report(test_true_labels, test_preds)

print(conf_matrix)
print(classification_report)

[[2811 1229]
 [2475 1951]]
              precision    recall  f1-score   support

           0       0.53      0.70      0.60      4040
           1       0.61      0.44      0.51      4426

    accuracy                           0.56      8466
   macro avg       0.57      0.57      0.56      8466
weighted avg       0.57      0.56      0.56      8466



In [56]:
data = {'True Labels': test_true_labels, 'Predictions': test_preds}
prediction_comparison_df = pd.DataFrame(data)

In [58]:
prediction_comparison_df[10:30]

Unnamed: 0,True Labels,Predictions
10,1,1
11,1,1
12,0,1
13,0,0
14,0,0
15,0,0
16,0,0
17,0,0
18,0,1
19,0,1


In [59]:
from IPython.display import FileLink
FileLink(r'CMV_Base_model_GPT2.pth')