In [None]:
from google.colab import drive
drive.mount('/content/drive')


import importlib
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report
import pandas as pd
from nltk.tokenize import word_tokenize
import os
from collections import Counter
from tqdm import tqdm

# Check if NLTK is installed, if not install it
package = "nltk"
try:
    importlib.import_module(package)
except ImportError:
    !pip install {package}
    importlib.import_module(package)

import nltk
nltk.download('punkt')

# Check if GloVe embeddings are present, if not download them
if not os.path.isfile('./glove.6B.300d.txt'):
    !wget http://nlp.stanford.edu/data/glove.6B.zip
    !unzip glove*.zip

def load_glove(path):
    """Loads GloVe embeddings."""
    with open(path, 'r', encoding='utf-8') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)

        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map

def sentences_to_indices(X, word_to_index, max_len):
    m = X.shape[0]  # number of training examples
    X_indices = np.zeros((m, max_len))

    for i in range(m):
        sentence_words = X[i].lower().split()
        j = 0

        for w in sentence_words:
            if j >= max_len:
                break
            if w in word_to_index:
                X_indices[i, j] = word_to_index[w]
                j = j + 1
    return X_indices

# Load the GloVe embeddings
word_to_index, index_to_word, word_to_vec_map = load_glove('./glove.6B.300d.txt')

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Zero_Shot_Data_Generation_and_BERT_Roberta_CNN_Curie/news_200.csv')
df1 = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Zero_Shot_Data_Generation_and_BERT_Roberta_CNN_Curie/generated_combined_data200text-curie-001.csv")


# Preprocess the dataset
df = df.dropna(subset=['text', 'title'])
df1 = df1.dropna(subset=['text', 'title'])

# Tokenize the text
max_len = 256  # choose a max length
tokens = sentences_to_indices(df['text'].values, word_to_index, max_len)
tokens1 = sentences_to_indices(df1['text'].values, word_to_index, max_len)

# Define the model
class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_size, num_classes, word_to_vec_map, word_to_index):
        super(CNN, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(self.pretrained_embedding(word_to_vec_map, word_to_index, vocab_size, embedding_size))
        self.conv1 = nn.Conv1d(embedding_size, 128, kernel_size=3)
        self.conv2 = nn.Conv1d(128, 256, kernel_size=3)
        self.fc = nn.Linear(256 * (max_len - 4), num_classes)

    def pretrained_embedding(self, word_to_vec_map, word_to_index, vocab_size, embedding_size):
        matrix_len = vocab_size
        weights_matrix = np.zeros((matrix_len, embedding_size))
        words_found = 0
        for word, i in word_to_index.items():
            try:
                weights_matrix[i] = word_to_vec_map[word]
                words_found += 1
            except KeyError:
                weights_matrix[i] = np.random.normal(scale=0.6, size=(embedding_size, ))
        return torch.from_numpy(weights_matrix).float()

    def forward(self, x):
        x = self.embedding(x.long())
        x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Create the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vocab_size = len(word_to_index) + 1
embedding_size = 300  # adjust the embedding size as needed
num_classes = 2
model = CNN(vocab_size, embedding_size, num_classes, word_to_vec_map, word_to_index)
model = model.to(device)

# Convert labels into torch tensors
train_inputs, test_inputs, train_labels, test_labels = train_test_split(
    tokens, df['label'].values, random_state=100, test_size=0.2)

train_inputs1, test_inputs1, train_labels1, test_labels1 = train_test_split(
    tokens1, df1['label'].values, random_state=100, test_size=0.2)

train_inputs,train_labels=train_inputs1,train_labels1

# Convert into torch tensors
train_inputs = torch.tensor(train_inputs)
test_inputs = torch.tensor(test_inputs)
train_labels = torch.tensor(train_labels)
test_labels = torch.tensor(test_labels)

# Create DataLoader for the training set
train_data = TensorDataset(train_inputs, train_labels)
train_dataloader = DataLoader(train_data, batch_size=32)

# Training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1):
    model.train()
    for step, batch in enumerate(train_dataloader):
        b_input_ids = batch[0].to(device)
        b_labels = batch[1].to(device)

        optimizer.zero_grad()
        outputs = model(b_input_ids)
        loss = criterion(outputs, b_labels)
        loss.backward()
        optimizer.step()

# Evaluation
# Create DataLoader for the test set
test_data = TensorDataset(test_inputs, test_labels)
test_dataloader = DataLoader(test_data, batch_size=32)

model.eval()
predictions = []
true_labels = []

for batch in test_dataloader:
    batch = [b.to(device) for b in batch]
    b_input_ids, b_labels = batch

    with torch.no_grad():
        outputs = model(b_input_ids)

    # Move logits and labels to CPU
    logits = outputs.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()

    # Store predictions and true labels
    predictions.append(logits)
    true_labels.append(label_ids)

# Flatten the predictions and true values for aggregate evaluation on the whole dataset
predictions = np.concatenate(predictions, axis=0)
true_labels = np.concatenate(true_labels, axis=0)

# For each input batch, pick the label (0 or 1) with the higher score
pred_flat = np.argmax(predictions, axis=1).flatten()

# Print the classification report
print(classification_report(true_labels, pred_flat, target_names=['Fake News', 'Real News']))


In [None]:
from google.colab import drive
drive.mount('/content/drive')


import importlib
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report
import pandas as pd
from nltk.tokenize import word_tokenize
import os
from collections import Counter
from tqdm import tqdm

# Check if NLTK is installed, if not install it
package = "nltk"
try:
    importlib.import_module(package)
except ImportError:
    !pip install {package}
    importlib.import_module(package)

import nltk
nltk.download('punkt')

# Check if GloVe embeddings are present, if not download them
if not os.path.isfile('./glove.6B.300d.txt'):
    !wget http://nlp.stanford.edu/data/glove.6B.zip
    !unzip glove*.zip

def load_glove(path):
    """Loads GloVe embeddings."""
    with open(path, 'r', encoding='utf-8') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)

        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map

def sentences_to_indices(X, word_to_index, max_len):
    m = X.shape[0]  # number of training examples
    X_indices = np.zeros((m, max_len))

    for i in range(m):
        sentence_words = X[i].lower().split()
        j = 0

        for w in sentence_words:
            if j >= max_len:
                break
            if w in word_to_index:
                X_indices[i, j] = word_to_index[w]
                j = j + 1
    return X_indices

# Load the GloVe embeddings
word_to_index, index_to_word, word_to_vec_map = load_glove('./glove.6B.300d.txt')

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Zero_Shot_Data_Generation_and_BERT_Roberta_CNN_Curie/news_500.csv')
df1 = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Zero_Shot_Data_Generation_and_BERT_Roberta_CNN_Curie/generated_combined_data500text-curie-001-3.csv")


# Preprocess the dataset
df = df.dropna(subset=['text', 'title'])
df1 = df1.dropna(subset=['text', 'title'])

# Tokenize the text
max_len = 256  # choose a max length
tokens = sentences_to_indices(df['text'].values, word_to_index, max_len)
tokens1 = sentences_to_indices(df1['text'].values, word_to_index, max_len)

# Define the model
class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_size, num_classes, word_to_vec_map, word_to_index):
        super(CNN, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(self.pretrained_embedding(word_to_vec_map, word_to_index, vocab_size, embedding_size))
        self.conv1 = nn.Conv1d(embedding_size, 128, kernel_size=3)
        self.conv2 = nn.Conv1d(128, 256, kernel_size=3)
        self.fc = nn.Linear(256 * (max_len - 4), num_classes)

    def pretrained_embedding(self, word_to_vec_map, word_to_index, vocab_size, embedding_size):
        matrix_len = vocab_size
        weights_matrix = np.zeros((matrix_len, embedding_size))
        words_found = 0
        for word, i in word_to_index.items():
            try:
                weights_matrix[i] = word_to_vec_map[word]
                words_found += 1
            except KeyError:
                weights_matrix[i] = np.random.normal(scale=0.6, size=(embedding_size, ))
        return torch.from_numpy(weights_matrix).float()

    def forward(self, x):
        x = self.embedding(x.long())
        x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Create the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vocab_size = len(word_to_index) + 1
embedding_size = 300  # adjust the embedding size as needed
num_classes = 2
model = CNN(vocab_size, embedding_size, num_classes, word_to_vec_map, word_to_index)
model = model.to(device)

# Convert labels into torch tensors
train_inputs, test_inputs, train_labels, test_labels = train_test_split(
    tokens, df['label'].values, random_state=100, test_size=0.2)

train_inputs1, test_inputs1, train_labels1, test_labels1 = train_test_split(
    tokens1, df1['label'].values, random_state=100, test_size=0.2)

train_inputs,train_labels=train_inputs1,train_labels1

# Convert into torch tensors
train_inputs = torch.tensor(train_inputs)
test_inputs = torch.tensor(test_inputs)
train_labels = torch.tensor(train_labels)
test_labels = torch.tensor(test_labels)

# Create DataLoader for the training set
train_data = TensorDataset(train_inputs, train_labels)
train_dataloader = DataLoader(train_data, batch_size=32)

# Training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1):
    model.train()
    for step, batch in enumerate(train_dataloader):
        b_input_ids = batch[0].to(device)
        b_labels = batch[1].to(device)

        optimizer.zero_grad()
        outputs = model(b_input_ids)
        loss = criterion(outputs, b_labels)
        loss.backward()
        optimizer.step()

# Evaluation
# Create DataLoader for the test set
test_data = TensorDataset(test_inputs, test_labels)
test_dataloader = DataLoader(test_data, batch_size=32)

model.eval()
predictions = []
true_labels = []

for batch in test_dataloader:
    batch = [b.to(device) for b in batch]
    b_input_ids, b_labels = batch

    with torch.no_grad():
        outputs = model(b_input_ids)

    # Move logits and labels to CPU
    logits = outputs.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()

    # Store predictions and true labels
    predictions.append(logits)
    true_labels.append(label_ids)

# Flatten the predictions and true values for aggregate evaluation on the whole dataset
predictions = np.concatenate(predictions, axis=0)
true_labels = np.concatenate(true_labels, axis=0)

# For each input batch, pick the label (0 or 1) with the higher score
pred_flat = np.argmax(predictions, axis=1).flatten()

# Print the classification report
print(classification_report(true_labels, pred_flat, target_names=['Fake News', 'Real News']))


In [None]:
from google.colab import drive
drive.mount('/content/drive')


import importlib
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report
import pandas as pd
from nltk.tokenize import word_tokenize
import os
from collections import Counter
from tqdm import tqdm

# Check if NLTK is installed, if not install it
package = "nltk"
try:
    importlib.import_module(package)
except ImportError:
    !pip install {package}
    importlib.import_module(package)

import nltk
nltk.download('punkt')

# Check if GloVe embeddings are present, if not download them
if not os.path.isfile('./glove.6B.300d.txt'):
    !wget http://nlp.stanford.edu/data/glove.6B.zip
    !unzip glove*.zip

def load_glove(path):
    """Loads GloVe embeddings."""
    with open(path, 'r', encoding='utf-8') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)

        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map

def sentences_to_indices(X, word_to_index, max_len):
    m = X.shape[0]  # number of training examples
    X_indices = np.zeros((m, max_len))

    for i in range(m):
        sentence_words = X[i].lower().split()
        j = 0

        for w in sentence_words:
            if j >= max_len:
                break
            if w in word_to_index:
                X_indices[i, j] = word_to_index[w]
                j = j + 1
    return X_indices

# Load the GloVe embeddings
word_to_index, index_to_word, word_to_vec_map = load_glove('./glove.6B.300d.txt')

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Zero_Shot_Data_Generation_and_BERT_Roberta_CNN_Curie/news_10000.csv')
df1 = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Zero_Shot_Data_Generation_and_BERT_Roberta_CNN_Curie/generated_combined_data10000text-curie-001.csv")



# Preprocess the dataset
df = df.dropna(subset=['text', 'title'])
df1 = df1.dropna(subset=['text', 'title'])

# Tokenize the text
max_len = 256  # choose a max length
tokens = sentences_to_indices(df['text'].values, word_to_index, max_len)
tokens1 = sentences_to_indices(df1['text'].values, word_to_index, max_len)

# Define the model
class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_size, num_classes, word_to_vec_map, word_to_index):
        super(CNN, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(self.pretrained_embedding(word_to_vec_map, word_to_index, vocab_size, embedding_size))
        self.conv1 = nn.Conv1d(embedding_size, 128, kernel_size=3)
        self.conv2 = nn.Conv1d(128, 256, kernel_size=3)
        self.fc = nn.Linear(256 * (max_len - 4), num_classes)

    def pretrained_embedding(self, word_to_vec_map, word_to_index, vocab_size, embedding_size):
        matrix_len = vocab_size
        weights_matrix = np.zeros((matrix_len, embedding_size))
        words_found = 0
        for word, i in word_to_index.items():
            try:
                weights_matrix[i] = word_to_vec_map[word]
                words_found += 1
            except KeyError:
                weights_matrix[i] = np.random.normal(scale=0.6, size=(embedding_size, ))
        return torch.from_numpy(weights_matrix).float()

    def forward(self, x):
        x = self.embedding(x.long())
        x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Create the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vocab_size = len(word_to_index) + 1
embedding_size = 300  # adjust the embedding size as needed
num_classes = 2
model = CNN(vocab_size, embedding_size, num_classes, word_to_vec_map, word_to_index)
model = model.to(device)

# Convert labels into torch tensors
train_inputs, test_inputs, train_labels, test_labels = train_test_split(
    tokens, df['label'].values, random_state=100, test_size=0.2)

train_inputs1, test_inputs1, train_labels1, test_labels1 = train_test_split(
    tokens1, df1['label'].values, random_state=100, test_size=0.2)

train_inputs,train_labels=train_inputs1,train_labels1

# Convert into torch tensors
train_inputs = torch.tensor(train_inputs)
test_inputs = torch.tensor(test_inputs)
train_labels = torch.tensor(train_labels)
test_labels = torch.tensor(test_labels)

# Create DataLoader for the training set
train_data = TensorDataset(train_inputs, train_labels)
train_dataloader = DataLoader(train_data, batch_size=32)

# Training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1):
    model.train()
    for step, batch in enumerate(train_dataloader):
        b_input_ids = batch[0].to(device)
        b_labels = batch[1].to(device)

        optimizer.zero_grad()
        outputs = model(b_input_ids)
        loss = criterion(outputs, b_labels)
        loss.backward()
        optimizer.step()

# Evaluation
# Create DataLoader for the test set
test_data = TensorDataset(test_inputs, test_labels)
test_dataloader = DataLoader(test_data, batch_size=32)

model.eval()
predictions = []
true_labels = []

for batch in test_dataloader:
    batch = [b.to(device) for b in batch]
    b_input_ids, b_labels = batch

    with torch.no_grad():
        outputs = model(b_input_ids)

    # Move logits and labels to CPU
    logits = outputs.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()

    # Store predictions and true labels
    predictions.append(logits)
    true_labels.append(label_ids)

# Flatten the predictions and true values for aggregate evaluation on the whole dataset
predictions = np.concatenate(predictions, axis=0)
true_labels = np.concatenate(true_labels, axis=0)

# For each input batch, pick the label (0 or 1) with the higher score
pred_flat = np.argmax(predictions, axis=1).flatten()

# Print the classification report
print(classification_report(true_labels, pred_flat, target_names=['Fake News', 'Real News']))


In [None]:
from google.colab import drive
drive.mount('/content/drive')


import importlib
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report
import pandas as pd
from nltk.tokenize import word_tokenize
import os
from collections import Counter
from tqdm import tqdm

# Check if NLTK is installed, if not install it
package = "nltk"
try:
    importlib.import_module(package)
except ImportError:
    !pip install {package}
    importlib.import_module(package)

import nltk
nltk.download('punkt')

# Check if GloVe embeddings are present, if not download them
if not os.path.isfile('./glove.6B.300d.txt'):
    !wget http://nlp.stanford.edu/data/glove.6B.zip
    !unzip glove*.zip

def load_glove(path):
    """Loads GloVe embeddings."""
    with open(path, 'r', encoding='utf-8') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)

        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map

def sentences_to_indices(X, word_to_index, max_len):
    m = X.shape[0]  # number of training examples
    X_indices = np.zeros((m, max_len))

    for i in range(m):
        sentence_words = X[i].lower().split()
        j = 0

        for w in sentence_words:
            if j >= max_len:
                break
            if w in word_to_index:
                X_indices[i, j] = word_to_index[w]
                j = j + 1
    return X_indices

# Load the GloVe embeddings
word_to_index, index_to_word, word_to_vec_map = load_glove('./glove.6B.300d.txt')

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Zero_Shot_Data_Generation_and_BERT_Roberta_CNN_Curie/train.csv')
df1 = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Zero_Shot_Data_Generation_and_BERT_Roberta_CNN_Curie/train_curie_20400.csv")


# Preprocess the dataset
df = df.dropna(subset=['text', 'title'])
df1 = df1.dropna(subset=['text', 'title'])

# Tokenize the text
max_len = 256  # choose a max length
tokens = sentences_to_indices(df['text'].values, word_to_index, max_len)
tokens1 = sentences_to_indices(df1['text'].values, word_to_index, max_len)

# Define the model
class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_size, num_classes, word_to_vec_map, word_to_index):
        super(CNN, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(self.pretrained_embedding(word_to_vec_map, word_to_index, vocab_size, embedding_size))
        self.conv1 = nn.Conv1d(embedding_size, 128, kernel_size=3)
        self.conv2 = nn.Conv1d(128, 256, kernel_size=3)
        self.fc = nn.Linear(256 * (max_len - 4), num_classes)

    def pretrained_embedding(self, word_to_vec_map, word_to_index, vocab_size, embedding_size):
        matrix_len = vocab_size
        weights_matrix = np.zeros((matrix_len, embedding_size))
        words_found = 0
        for word, i in word_to_index.items():
            try:
                weights_matrix[i] = word_to_vec_map[word]
                words_found += 1
            except KeyError:
                weights_matrix[i] = np.random.normal(scale=0.6, size=(embedding_size, ))
        return torch.from_numpy(weights_matrix).float()

    def forward(self, x):
        x = self.embedding(x.long())
        x = x.permute(0, 2, 1)
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Create the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vocab_size = len(word_to_index) + 1
embedding_size = 300  # adjust the embedding size as needed
num_classes = 2
model = CNN(vocab_size, embedding_size, num_classes, word_to_vec_map, word_to_index)
model = model.to(device)

# Convert labels into torch tensors
train_inputs, test_inputs, train_labels, test_labels = train_test_split(
    tokens, df['label'].values, random_state=100, test_size=0.2)

train_inputs1, test_inputs1, train_labels1, test_labels1 = train_test_split(
    tokens1, df1['label'].values, random_state=100, test_size=0.2)

train_inputs,train_labels=train_inputs1,train_labels1

# Convert into torch tensors
train_inputs = torch.tensor(train_inputs)
test_inputs = torch.tensor(test_inputs)
train_labels = torch.tensor(train_labels)
test_labels = torch.tensor(test_labels)

# Create DataLoader for the training set
train_data = TensorDataset(train_inputs, train_labels)
train_dataloader = DataLoader(train_data, batch_size=32)

# Training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1):
    model.train()
    for step, batch in enumerate(train_dataloader):
        b_input_ids = batch[0].to(device)
        b_labels = batch[1].to(device)

        optimizer.zero_grad()
        outputs = model(b_input_ids)
        loss = criterion(outputs, b_labels)
        loss.backward()
        optimizer.step()

# Evaluation
# Create DataLoader for the test set
test_data = TensorDataset(test_inputs, test_labels)
test_dataloader = DataLoader(test_data, batch_size=32)

model.eval()
predictions = []
true_labels = []

for batch in test_dataloader:
    batch = [b.to(device) for b in batch]
    b_input_ids, b_labels = batch

    with torch.no_grad():
        outputs = model(b_input_ids)

    # Move logits and labels to CPU
    logits = outputs.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()

    # Store predictions and true labels
    predictions.append(logits)
    true_labels.append(label_ids)

# Flatten the predictions and true values for aggregate evaluation on the whole dataset
predictions = np.concatenate(predictions, axis=0)
true_labels = np.concatenate(true_labels, axis=0)

# For each input batch, pick the label (0 or 1) with the higher score
pred_flat = np.argmax(predictions, axis=1).flatten()

# Print the classification report
print(classification_report(true_labels, pred_flat, target_names=['Fake News', 'Real News']))


# BERT Section Below