In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from sklearn.metrics import accuracy_score, classification_report
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Check if GPU is available and use it if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
# Load the cleaned training data
train_df = pd.read_csv("train_cleaned_consolidated.csv")

In [4]:
# Split the data into training and test sets (stratified)
train_data, test_data = train_test_split(train_df, test_size=0.2, stratify=train_df['label'])

# Initialize the LabelEncoder to convert labels from strings to integers
label_encoder = LabelEncoder()

# Fit and transform the training labels and transform the test labels
train_data['label'] = label_encoder.fit_transform(train_data['label'])
test_data['label'] = label_encoder.transform(test_data['label'])
print(label_encoder.classes_)

['False' 'Mostly False' 'Mostly True' 'True' 'Unverified/Mixed']


In [5]:
# Convert to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_data)
test_dataset = Dataset.from_pandas(test_data)

In [6]:
# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [7]:
# Tokenize the text column
def tokenize_function(examples):
    return tokenizer(examples['claim'], padding="max_length", truncation=True)

In [8]:
train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

Map: 100%|██████████| 10540/10540 [00:04<00:00, 2515.79 examples/s]
Map: 100%|██████████| 2636/2636 [00:01<00:00, 2633.53 examples/s]


In [9]:
# Load the BERT model for classification (5 labels in this case)
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5).to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
# Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    eval_strategy="epoch",           # evaluation strategy
    learning_rate=2e-5,              # learning rate
    per_device_train_batch_size=16,   # batch size for training
    per_device_eval_batch_size=16,    # batch size for evaluation
    num_train_epochs=10,              # number of epochs
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for logs
)

In [11]:
# Set up the Trainer
trainer = Trainer(
    model=model,                         # model to train
    args=training_args,                  # training arguments
    train_dataset=train_dataset,         # training dataset
    eval_dataset=test_dataset,           # evaluation dataset (test set)
    tokenizer=tokenizer,                 # tokenizer for the model
    compute_metrics=lambda p: {"accuracy": accuracy_score(p.label_ids, p.predictions.argmax(-1))}
)

  trainer = Trainer(


In [12]:
# Train the model
trainer.train()# Make predictions on the test set
predictions = trainer.predict(test_dataset)
preds = predictions.predictions.argmax(axis=-1)

  attn_output = torch.nn.functional.scaled_dot_product_attention(
  8%|▊         | 500/6590 [10:00<1:41:46,  1.00s/it]

{'loss': 1.3217, 'grad_norm': 4.394057273864746, 'learning_rate': 1.8482549317147193e-05, 'epoch': 0.76}


                                                    
 10%|█         | 659/6590 [13:35<1:32:00,  1.07it/s]

{'eval_loss': 1.259735107421875, 'eval_accuracy': 0.4264036418816389, 'eval_runtime': 54.3715, 'eval_samples_per_second': 48.481, 'eval_steps_per_second': 3.035, 'epoch': 1.0}


 15%|█▌        | 1000/6590 [19:16<1:33:02,  1.00it/s]

{'loss': 1.193, 'grad_norm': 6.948202610015869, 'learning_rate': 1.6965098634294384e-05, 'epoch': 1.52}


                                                     
 20%|██        | 1318/6590 [25:28<1:21:26,  1.08it/s]

{'eval_loss': 1.2287671566009521, 'eval_accuracy': 0.44840667678300455, 'eval_runtime': 53.7015, 'eval_samples_per_second': 49.086, 'eval_steps_per_second': 3.073, 'epoch': 2.0}


 23%|██▎       | 1500/6590 [28:30<1:24:19,  1.01it/s] 

{'loss': 1.0883, 'grad_norm': 7.995899200439453, 'learning_rate': 1.544764795144158e-05, 'epoch': 2.28}


                                                     
 30%|███       | 1977/6590 [37:28<1:11:10,  1.08it/s]

{'eval_loss': 1.3377177715301514, 'eval_accuracy': 0.44537177541729894, 'eval_runtime': 53.7602, 'eval_samples_per_second': 49.033, 'eval_steps_per_second': 3.069, 'epoch': 3.0}


 30%|███       | 2000/6590 [37:51<1:16:41,  1.00s/it] 

{'loss': 0.9403, 'grad_norm': 11.259115219116211, 'learning_rate': 1.3930197268588772e-05, 'epoch': 3.03}


 38%|███▊      | 2500/6590 [46:11<1:08:11,  1.00s/it]

{'loss': 0.6672, 'grad_norm': 18.331859588623047, 'learning_rate': 1.2412746585735965e-05, 'epoch': 3.79}


                                                     
 40%|████      | 2636/6590 [49:21<1:01:08,  1.08it/s]

{'eval_loss': 1.6749451160430908, 'eval_accuracy': 0.4002276176024279, 'eval_runtime': 53.532, 'eval_samples_per_second': 49.242, 'eval_steps_per_second': 3.082, 'epoch': 4.0}


 46%|████▌     | 3000/6590 [55:24<59:34,  1.00it/s]   

{'loss': 0.494, 'grad_norm': 25.059669494628906, 'learning_rate': 1.0895295902883156e-05, 'epoch': 4.55}


                                                     
 50%|█████     | 3295/6590 [1:01:12<50:30,  1.09it/s]

{'eval_loss': 1.8956022262573242, 'eval_accuracy': 0.4381638846737481, 'eval_runtime': 53.3824, 'eval_samples_per_second': 49.38, 'eval_steps_per_second': 3.091, 'epoch': 5.0}


 53%|█████▎    | 3500/6590 [1:04:35<50:57,  1.01it/s]   

{'loss': 0.3699, 'grad_norm': 13.685911178588867, 'learning_rate': 9.377845220030349e-06, 'epoch': 5.31}


                                                       
 60%|██████    | 3954/6590 [1:14:09<48:24,  1.10s/it]

{'eval_loss': 2.3681514263153076, 'eval_accuracy': 0.4165402124430956, 'eval_runtime': 71.4662, 'eval_samples_per_second': 36.885, 'eval_steps_per_second': 2.309, 'epoch': 6.0}


 61%|██████    | 4000/6590 [1:15:03<50:44,  1.18s/it]   

{'loss': 0.2654, 'grad_norm': 18.623281478881836, 'learning_rate': 7.860394537177543e-06, 'epoch': 6.07}


 68%|██████▊   | 4500/6590 [1:24:52<35:07,  1.01s/it]  

{'loss': 0.1933, 'grad_norm': 17.824872970581055, 'learning_rate': 6.3429438543247346e-06, 'epoch': 6.83}


                                                     
 70%|███████   | 4613/6590 [1:27:40<30:04,  1.10it/s]

{'eval_loss': 2.773129940032959, 'eval_accuracy': 0.4184370257966616, 'eval_runtime': 53.3163, 'eval_samples_per_second': 49.441, 'eval_steps_per_second': 3.095, 'epoch': 7.0}


 76%|███████▌  | 5000/6590 [1:34:03<26:10,  1.01it/s]  

{'loss': 0.1261, 'grad_norm': 13.820586204528809, 'learning_rate': 4.8254931714719275e-06, 'epoch': 7.59}


                                                     
 80%|████████  | 5272/6590 [1:39:32<20:34,  1.07it/s]

{'eval_loss': 3.179141044616699, 'eval_accuracy': 0.40477996965098634, 'eval_runtime': 55.0092, 'eval_samples_per_second': 47.919, 'eval_steps_per_second': 2.999, 'epoch': 8.0}


 83%|████████▎ | 5500/6590 [1:43:23<18:27,  1.02s/it]  

{'loss': 0.1023, 'grad_norm': 27.41055679321289, 'learning_rate': 3.3080424886191204e-06, 'epoch': 8.35}


                                                     
 90%|█████████ | 5931/6590 [1:51:35<10:13,  1.07it/s]

{'eval_loss': 3.4558138847351074, 'eval_accuracy': 0.4161608497723824, 'eval_runtime': 54.3788, 'eval_samples_per_second': 48.475, 'eval_steps_per_second': 3.034, 'epoch': 9.0}


 91%|█████████ | 6000/6590 [1:52:44<09:54,  1.01s/it]  

{'loss': 0.0806, 'grad_norm': 0.34029486775398254, 'learning_rate': 1.7905918057663127e-06, 'epoch': 9.1}


 99%|█████████▊| 6500/6590 [2:01:11<01:31,  1.01s/it]

{'loss': 0.0553, 'grad_norm': 0.9652820825576782, 'learning_rate': 2.7314112291350536e-07, 'epoch': 9.86}


                                                     
100%|██████████| 6590/6590 [2:03:37<00:00,  1.13s/it]


{'eval_loss': 3.5620431900024414, 'eval_accuracy': 0.41312594840667677, 'eval_runtime': 54.0115, 'eval_samples_per_second': 48.804, 'eval_steps_per_second': 3.055, 'epoch': 10.0}
{'train_runtime': 7417.0699, 'train_samples_per_second': 14.21, 'train_steps_per_second': 0.888, 'train_loss': 0.524141067148883, 'epoch': 10.0}


100%|██████████| 165/165 [00:53<00:00,  3.10it/s]


In [13]:
# Calculate accuracy
accuracy = accuracy_score(test_dataset['label'], preds)
print(f"Accuracy: {accuracy}")

Accuracy: 0.41312594840667677


In [14]:
# Calculate accuracy
accuracy = accuracy_score(test_dataset['label'], preds)
print(f"Accuracy: {accuracy}")

Accuracy: 0.41312594840667677


In [15]:
# Print a detailed classification report with class labels
report = classification_report(test_dataset['label'], preds, target_names=label_encoder.classes_)
print(f"Classification Report:\n{report}")

Classification Report:
                  precision    recall  f1-score   support

           False       0.52      0.41      0.46       769
    Mostly False       0.44      0.50      0.47       908
     Mostly True       0.27      0.36      0.31       480
            True       0.28      0.20      0.23       384
Unverified/Mixed       0.74      0.73      0.73        95

        accuracy                           0.41      2636
       macro avg       0.45      0.44      0.44      2636
    weighted avg       0.42      0.41      0.41      2636



In [16]:
# Print a detailed classification report
report = classification_report(test_dataset['label'], preds)
print(f"Classification Report:\n{report}")# Save the trained model
model.save_pretrained("bert_classifier")
tokenizer.save_pretrained("bert_classifier")

Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.41      0.46       769
           1       0.44      0.50      0.47       908
           2       0.27      0.36      0.31       480
           3       0.28      0.20      0.23       384
           4       0.74      0.73      0.73        95

    accuracy                           0.41      2636
   macro avg       0.45      0.44      0.44      2636
weighted avg       0.42      0.41      0.41      2636



('bert_classifier\\tokenizer_config.json',
 'bert_classifier\\special_tokens_map.json',
 'bert_classifier\\vocab.txt',
 'bert_classifier\\added_tokens.json')