### **Load model and evaluate on different dataset**

In [2]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
import numpy as np
from sklearn.metrics import classification_report


file_path = '/content/drive/MyDrive/notebooks/new_dataset.csv'
df = pd.read_csv(file_path)
df = df[['text', 'label']]
df = df.fillna('')


tokenizer = AutoTokenizer.from_pretrained('/content/drive/MyDrive/DistilBERT')


def tokenize_data(examples):
    return tokenizer(examples["text"], truncation=True, padding='max_length', max_length=512)

fakenews_dataset = Dataset.from_pandas(df)
tokenized_fakenews = fakenews_dataset.map(tokenize_data, batched=True)


model = AutoModelForSequenceClassification.from_pretrained('/content/drive/MyDrive/DistilBERT')
trainer = Trainer(model=model, tokenizer=tokenizer)


predictions = trainer.predict(tokenized_fakenews)
preds = np.argmax(predictions.predictions, axis=1)
true_labels = df['label'].values


report = classification_report(true_labels, preds, target_names=['True', 'Fake'])
print(report)





Map:   0%|          | 0/44898 [00:00<?, ? examples/s]

              precision    recall  f1-score   support

        True       0.62      0.04      0.07     21417
        Fake       0.53      0.98      0.69     23481

    accuracy                           0.53     44898
   macro avg       0.58      0.51      0.38     44898
weighted avg       0.57      0.53      0.39     44898



#### Saving news model predicted wrong for further analysis

In [6]:
false_positives = df[(preds == 0) & (true_labels == 1)]
false_negatives = df[(preds == 1) & (true_labels == 0)]


false_positives_examples = false_positives['text'].head(10).tolist()
false_negatives_examples = false_negatives['text'].head(10).tolist()

with open('/content/drive/MyDrive/notebooks/results/false_positives.txt', 'w') as f:
    f.write('\n\n\n\n\n'.join(false_positives_examples))

with open('/content/drive/MyDrive/notebooks/results/false_negatives.txt', 'w') as f:
    f.write('\n\n\n\n\n'.join(false_negatives_examples))