In [None]:
# Install necessary libraries
!pip install transformers
!pip install datasets

from google.colab import drive
drive.mount('/content/drive')

import torch
import time
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import pandas as pd
from tqdm import tqdm

# Load datasets
gossipcop_fake = pd.read_csv('/content/drive/My Drive/Colab Notebooks/datasets/FakeNewsNet/gossipcop_fake.csv')
gossipcop_real = pd.read_csv('/content/drive/My Drive/Colab Notebooks/datasets/FakeNewsNet/gossipcop_real.csv')
politifact_fake = pd.read_csv('/content/drive/My Drive/Colab Notebooks/datasets/FakeNewsNet/politifact_fake.csv')
politifact_real = pd.read_csv('/content/drive/My Drive/Colab Notebooks/datasets/FakeNewsNet/politifact_real.csv')

# Add labels: 1 = fake, 0 = real
gossipcop_fake['label'] = 1
gossipcop_real['label'] = 0
politifact_fake['label'] = 1
politifact_real['label'] = 0

# Combine datasets
gossipcop = pd.concat([gossipcop_fake, gossipcop_real], ignore_index=True)
politifact = pd.concat([politifact_fake, politifact_real], ignore_index=True)
df = pd.concat([gossipcop, politifact], ignore_index=True)

# Check if the 'title' column exists
if 'title' not in df.columns:
    raise ValueError("The column 'title' is not found in the dataset. Ensure the dataset contains a 'title' or adjust to use the 'text' column.")

# Choose the model to use: 'bert-base-uncased', 'roberta-base', or 'gpt2'
model_name = 'bert-base-uncased'  # Change to 'roberta-base' or 'gpt2' as needed

# Load tokenizer
if model_name == 'bert-base-uncased':
    tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
elif model_name == 'roberta-base':
    tokenizer = AutoTokenizer.from_pretrained('roberta-base')
elif model_name == 'gpt2':
    tokenizer = AutoTokenizer.from_pretrained('gpt2')
    tokenizer.pad_token = tokenizer.eos_token  # Set padding token for GPT-2

# Define a custom dataset class
class FakeNewsDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = self.texts.iloc[index]
        label = self.labels.iloc[index]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_attention_mask=True,
            truncation=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Helper function to calculate evaluation metrics
def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    roc_auc = roc_auc_score(y_true, y_pred)
    return accuracy, precision, recall, f1, roc_auc

# Training function
def train(model, data_loader, optimizer, device):
    model.train()
    total_loss = 0
    for batch in tqdm(data_loader, desc="Training"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(data_loader)

# Evaluation function with timing
def evaluate(model, data_loader, device):
    model.eval()
    predictions, true_labels = [], []
    start_inference_time = time.time()  # Start timing

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            predictions.extend(torch.argmax(logits, dim=1).cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    # Calculate inference time per sample
    inference_time = (time.time() - start_inference_time) / len(data_loader.dataset) * 1000  # Convert to ms per sample
    return predictions, true_labels, inference_time

# Cross-validation setup with 5 folds
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
results = []

# Set device for training and evaluation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Cross-validation loop
epochs = 3
for fold, (train_idx, test_idx) in enumerate(kf.split(df['title'], df['label'])):
    print(f"\nFold {fold + 1}")

    # Split data for this fold
    X_train, X_test = df['title'].iloc[train_idx], df['title'].iloc[test_idx]
    y_train, y_test = df['label'].iloc[train_idx], df['label'].iloc[test_idx]

    # Create DataLoader for train and test splits
    train_dataset = FakeNewsDataset(X_train, y_train, tokenizer, max_len=128)
    test_dataset = FakeNewsDataset(X_test, y_test, tokenizer, max_len=128)
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

    # Initialize the model
    if model_name == 'bert-base-uncased':
        model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2).to(device)
    elif model_name == 'roberta-base':
        model = AutoModelForSequenceClassification.from_pretrained('roberta-base', num_labels=2).to(device)
    elif model_name == 'gpt2':
        model = AutoModelForSequenceClassification.from_pretrained('gpt2', num_labels=2).to(device)
        model.config.pad_token_id = model.config.eos_token_id  # Set padding token for GPT-2

    optimizer = AdamW(model.parameters(), lr=2e-5)

    # Training and evaluation for each epoch
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")

        # Train
        train_loss = train(model, train_loader, optimizer, device)
        print(f"Training loss: {train_loss}")

        # Evaluate with timing
        test_predictions, test_true_labels, inference_time = evaluate(model, test_loader, device)
        test_acc, test_prec, test_rec, test_f1, test_roc_auc = calculate_metrics(test_true_labels, test_predictions)

        # Log results for this fold and epoch
        results.append({
            'Fold': fold,
            'Accuracy': test_acc,
            'Precision': test_prec,
            'Recall': test_rec,
            'F1-Score': test_f1,
            'ROC-AUC': test_roc_auc,
            'Inference Time (ms)': inference_time
        })
        print(f"Test Accuracy: {test_acc:.4f}, Precision: {test_prec:.4f}, Recall: {test_rec:.4f}, F1: {test_f1:.4f}, ROC-AUC: {test_roc_auc:.4f}")

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Calculate mean and std for each metric across folds
mean_results = results_df.mean(numeric_only=True)
std_results = results_df.std(numeric_only=True)

# Append mean and std rows to the DataFrame
mean_results['Fold'] = 'Mean'
std_results['Fold'] = 'Std'
results_df = pd.concat([results_df, pd.DataFrame([mean_results, std_results])], ignore_index=True)

# Save to CSV
results_df.to_csv(f'/content/drive/My Drive/Colab Notebooks/sundayrun/Fakenewsnet{model_name}_crossval_results_with_mean_std.csv', index=False)

# Print the results to verify
print("\nCross-Validation Results (Mean ± Std):")
print(results_df)


Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]




Fold 1


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training: 100%|██████████| 1160/1160 [01:42<00:00, 11.26it/s]


Training loss: 0.39858938676851063


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 31.87it/s]


Test Accuracy: 0.8517, Precision: 0.8488, Recall: 0.8517, F1: 0.8386, ROC-AUC: 0.7372
Epoch 2/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.43it/s]


Training loss: 0.2775951667399756


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 32.05it/s]


Test Accuracy: 0.8578, Precision: 0.8526, Recall: 0.8578, F1: 0.8536, ROC-AUC: 0.7834
Epoch 3/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.42it/s]


Training loss: 0.18085438052216415


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 32.00it/s]


Test Accuracy: 0.8578, Precision: 0.8526, Recall: 0.8578, F1: 0.8535, ROC-AUC: 0.7832

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.38it/s]


Training loss: 0.40586295177325094


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 31.76it/s]


Test Accuracy: 0.8653, Precision: 0.8604, Recall: 0.8653, F1: 0.8595, ROC-AUC: 0.7835
Epoch 2/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.39it/s]


Training loss: 0.27985466830914135


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 31.80it/s]


Test Accuracy: 0.8605, Precision: 0.8567, Recall: 0.8605, F1: 0.8509, ROC-AUC: 0.7600
Epoch 3/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.39it/s]


Training loss: 0.18368997850921004


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 31.72it/s]


Test Accuracy: 0.8592, Precision: 0.8578, Recall: 0.8592, F1: 0.8585, ROC-AUC: 0.8060

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.40it/s]


Training loss: 0.4062260480469157


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 31.77it/s]


Test Accuracy: 0.8633, Precision: 0.8582, Recall: 0.8633, F1: 0.8579, ROC-AUC: 0.7834
Epoch 2/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.41it/s]


Training loss: 0.2772015652378443


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 32.05it/s]


Test Accuracy: 0.8508, Precision: 0.8570, Recall: 0.8508, F1: 0.8533, ROC-AUC: 0.8179
Epoch 3/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.44it/s]


Training loss: 0.18031609602079823


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 32.08it/s]


Test Accuracy: 0.8450, Precision: 0.8544, Recall: 0.8450, F1: 0.8484, ROC-AUC: 0.8172

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.41it/s]


Training loss: 0.3991418702206735


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 31.91it/s]


Test Accuracy: 0.8478, Precision: 0.8465, Recall: 0.8478, F1: 0.8471, ROC-AUC: 0.7914
Epoch 2/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.42it/s]


Training loss: 0.27872124291346245


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 31.94it/s]


Test Accuracy: 0.8547, Precision: 0.8518, Recall: 0.8547, F1: 0.8530, ROC-AUC: 0.7939
Epoch 3/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.42it/s]


Training loss: 0.1767397844585879


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 31.85it/s]


Test Accuracy: 0.8476, Precision: 0.8443, Recall: 0.8476, F1: 0.8456, ROC-AUC: 0.7834

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.41it/s]


Training loss: 0.4067000776529312


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 32.00it/s]


Test Accuracy: 0.8183, Precision: 0.8364, Recall: 0.8183, F1: 0.8243, ROC-AUC: 0.7968
Epoch 2/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.42it/s]


Training loss: 0.28030408433927545


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 31.96it/s]


Test Accuracy: 0.8610, Precision: 0.8556, Recall: 0.8610, F1: 0.8555, ROC-AUC: 0.7806
Epoch 3/3


Training: 100%|██████████| 1160/1160 [01:41<00:00, 11.42it/s]


Training loss: 0.1879952196344926


Evaluating: 100%|██████████| 290/290 [00:09<00:00, 31.87it/s]
  mean_results['Fold'] = 'Mean'
  std_results['Fold'] = 'Std'


Test Accuracy: 0.8515, Precision: 0.8513, Recall: 0.8515, F1: 0.8514, ROC-AUC: 0.8002

Cross-Validation Results (Mean ± Std):
    Fold  Accuracy  Precision    Recall  F1-Score   ROC-AUC  \
0      0  0.851724   0.848798  0.851724  0.838602  0.737226   
1      0  0.857759   0.852640  0.857759  0.853567  0.783447   
2      0  0.857759   0.852611  0.857759  0.853514  0.783156   
3      1  0.865273   0.860356  0.865273  0.859480  0.783507   
4      1  0.860530   0.856749  0.860530  0.850857  0.759979   
5      1  0.859237   0.857812  0.859237  0.858459  0.805979   
6      2  0.863333   0.858219  0.863333  0.857928  0.783381   
7      2  0.850830   0.857011  0.850830  0.853256  0.817852   
8      2  0.845010   0.854373  0.845010  0.848421  0.817183   
9      3  0.847812   0.846458  0.847812  0.847087  0.791396   
10     3  0.854710   0.851813  0.854710  0.852966  0.793946   
11     3  0.847596   0.844282  0.847596  0.845592  0.783394   
12     4  0.818280   0.836433  0.818280  0.824282  0.79

In [None]:
# Install necessary libraries for PyTorch Geometric
#!pip install torch-geometric torch-sparse torch-scatter torch-cluster -f https://data.pyg.org/whl/torch-1.9.0+cu102.html

import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, SAGEConv, GATConv, GINConv
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.feature_extraction.text import TfidfVectorizer
from tqdm import tqdm
import time

# Set device to CPU (use "cuda" if GPU is available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load the WELFake dataset
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/datasets/welfake/WELFake_Dataset.csv')

# Drop NaN values from the 'text' column if any
df = df.dropna(subset=['text'])

# TF-IDF vectorization
vectorizer = TfidfVectorizer(max_features=5000)
node_features = vectorizer.fit_transform(df['text']).toarray()  # Convert sparse matrix to dense
labels = df['label'].values  # Assuming label is binary with 0 for fake and 1 for real

# Define the GNN model
class GNN(torch.nn.Module):
    def __init__(self, model_type='gcn', input_dim=5000, hidden_dim=64, output_dim=2, pretrain_output_dim=1):
        super(GNN, self).__init__()
        if model_type == 'gcn':
            self.conv1 = GCNConv(input_dim, hidden_dim)
            self.conv2 = GCNConv(hidden_dim, hidden_dim)
        elif model_type == 'graphsage':
            self.conv1 = SAGEConv(input_dim, hidden_dim)
            self.conv2 = SAGEConv(hidden_dim, hidden_dim)
        elif model_type == 'gat':
            self.conv1 = GATConv(input_dim, hidden_dim, heads=8)
            self.conv2 = GATConv(hidden_dim * 8, hidden_dim)
        elif model_type == 'gin':
            self.conv1 = GINConv(torch.nn.Sequential(torch.nn.Linear(input_dim, hidden_dim),
                                                     torch.nn.ReLU(),
                                                     torch.nn.Linear(hidden_dim, hidden_dim)))
            self.conv2 = GINConv(torch.nn.Sequential(torch.nn.Linear(hidden_dim, hidden_dim),
                                                     torch.nn.ReLU(),
                                                     torch.nn.Linear(hidden_dim, hidden_dim)))

        self.fc = torch.nn.Linear(hidden_dim, output_dim)  # Classification head
        self.pretrain_fc = torch.nn.Linear(hidden_dim, pretrain_output_dim)  # Pre-training head

    def forward(self, data, pretrain=False):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))

        if pretrain:
            x = torch.mean(x, dim=0, keepdim=True)
            return self.pretrain_fc(x)
        else:
            return self.fc(x)

# Function for calculating metrics
def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted')

    if len(np.unique(y_true)) > 1:
        roc_auc = roc_auc_score(y_true, y_pred)
    else:
        roc_auc = float('nan')

    return accuracy, precision, recall, f1, roc_auc

# Pre-training function for link prediction
def pretrain(model, data_loader, optimizer, device, epochs=5):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for data in tqdm(data_loader, desc="Pre-training"):
            data = data.to(device)
            optimizer.zero_grad()
            output = model(data, pretrain=True)
            output = output.view(-1)
            edge_labels = torch.randint(0, 2, (output.size(0),)).float().to(device)
            loss = F.mse_loss(output, edge_labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

# Cross-validation setup (5-fold)
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
metrics_results = []

for fold, (train_idx, test_idx) in enumerate(kf.split(node_features, labels)):
    print(f"\nFold {fold + 1}")

    # Generate edge indices (dummy edges for demonstration purposes)
    num_train_nodes = len(train_idx)
    num_test_nodes = len(test_idx)
    train_edge_index = np.random.randint(0, num_train_nodes, (2, num_train_nodes * 5))
    test_edge_index = np.random.randint(0, num_test_nodes, (2, num_test_nodes * 5))

    # Prepare data for GNN
    x_train = torch.tensor(node_features[train_idx], dtype=torch.float)
    edge_index_train = torch.tensor(train_edge_index, dtype=torch.long)
    y_train = torch.tensor(labels[train_idx], dtype=torch.long)
    x_test = torch.tensor(node_features[test_idx], dtype=torch.float)
    edge_index_test = torch.tensor(test_edge_index, dtype=torch.long)
    y_test = torch.tensor(labels[test_idx], dtype=torch.long)

    train_data = Data(x=x_train, edge_index=edge_index_train, y=y_train)
    test_data = Data(x=x_test, edge_index=edge_index_test, y=y_test)

    train_loader = DataLoader([train_data], batch_size=1, shuffle=True)
    test_loader = DataLoader([test_data], batch_size=1, shuffle=False)

    # Initialize model and optimizer
    model = GNN(model_type='gcn', input_dim=5000, hidden_dim=64, output_dim=2).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Pre-training phase
    print("Starting Pre-training")
    pretrain(model, train_loader, optimizer, device, epochs=5)

    # Fine-tuning phase
    epochs = 3
    for epoch in range(epochs):
        model.train()
        for data in train_loader:
            data = data.to(device)
            optimizer.zero_grad()
            out = model(data, pretrain=False)
            loss = F.cross_entropy(out, data.y)
            loss.backward()
            optimizer.step()

    # Evaluation phase
    model.eval()
    y_true, y_pred = [], []
    start_time = time.time()
    with torch.no_grad():
        for data in test_loader:
            data = data.to(device)
            out = model(data, pretrain=False)
            pred = out.argmax(dim=1).cpu().numpy()
            y_pred.extend(pred)
            y_true.extend(data.y.cpu().numpy())
    inference_time = (time.time() - start_time) / len(test_loader)

    # Calculate metrics
    accuracy, precision, recall, f1, roc_auc = calculate_metrics(y_true, y_pred)
    metrics_results.append([fold, accuracy, precision, recall, f1, roc_auc, inference_time])

# Convert results to DataFrame and include fold number
results_df = pd.DataFrame(metrics_results, columns=['Fold', 'Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC', 'Inference Time (ms)'])
results_df['Inference Time (ms)'] *= 1000  # Convert to milliseconds

# Calculate mean and standard deviation
mean_results = results_df.mean(numeric_only=True)
std_results = results_df.std(numeric_only=True)

mean_results['Fold'] = 'Mean'
std_results['Fold'] = 'Std'

# Append mean and std to the DataFrame
results_df = pd.concat([results_df, pd.DataFrame([mean_results, std_results])], ignore_index=True)

# Save to CSV with fold numbers included
output_filename = '/content/drive/My Drive/Colab Notebooks/sundayrun/welfake_gan_crossval_results_with_folds1.csv'
results_df.to_csv(output_filename, index=False)

# Print the results to verify
print("\nCross-Validation Results (Mean ± Std):")
print(results_df)
