In [None]:
!git clone https://github.com/pooja-premnath/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil

Cloning into 'SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil'...
remote: Enumerating objects: 7, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 7 (delta 2), reused 7 (delta 2), pack-reused 0[K
Receiving objects: 100% (7/7), 192.82 KiB | 21.42 MiB/s, done.
Resolving deltas: 100% (2/2), done.


## XLM-RoBERTa

In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, XLMRobertaForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
from tqdm import tqdm

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained model for sequence classification
model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=4)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=5,              # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    evaluation_strategy="epoch",     # Evaluate every epoch
    save_strategy="epoch",           # Save the model every epoch
    load_best_model_at_end=True,     # Load the best model when finished training
    metric_for_best_model="accuracy" # Use accuracy to select the best model
)

# Initialize the Trainer
trainer = Trainer(
    model=model,                         # The instantiated 🤗 Transformers model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics      # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8479,0.844971,0.508091,0.342361,0.258156,0.508091
2,0.7794,0.851744,0.508091,0.342361,0.258156,0.508091
3,0.8669,0.869883,0.461165,0.291101,0.212673,0.461165
4,0.8825,0.843889,0.461165,0.291101,0.212673,0.461165
5,0.8852,0.828382,0.508091,0.342361,0.258156,0.508091


  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Test Accuracy: 0.5081
Test F1 Score: 0.3424
Test Precision: 0.2582
Test Recall: 0.5081


  _warn_prf(average, modifier, msg_start, len(result))


## mBERT

In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
from tqdm import tqdm

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data using mBERT
tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained mBERT model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=4)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=5,              # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    evaluation_strategy="epoch",     # Evaluate every epoch
    save_strategy="epoch",           # Save the model every epoch
    load_best_model_at_end=True,     # Load the best model when finished training
    metric_for_best_model="accuracy" # Use accuracy to select the best model
)

# Initialize the Trainer
trainer = Trainer(
    model=model,                         # The instantiated 🤗 Transformers model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics      # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8495,0.83336,0.508091,0.342361,0.258156,0.508091
2,0.7784,0.837285,0.508091,0.342361,0.258156,0.508091
3,0.8613,0.849542,0.461165,0.291101,0.212673,0.461165
4,0.885,0.847265,0.461165,0.291101,0.212673,0.461165
5,0.8551,0.833782,0.516181,0.491792,0.498355,0.516181


  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Test Accuracy: 0.5162
Test F1 Score: 0.4918
Test Precision: 0.4984
Test Recall: 0.5162


  _warn_prf(average, modifier, msg_start, len(result))


## mdeBERTa

In [None]:
import pandas as pd
import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data using mDeBERTa-v3-base-mnli-xnli
tokenizer = AutoTokenizer.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained mDeBERTa-v3 model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli",
    num_labels=4,
    ignore_mismatched_sizes=True
)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=5,              # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    evaluation_strategy="epoch",     # Evaluate every epoch
    save_strategy="epoch",           # Save the model every epoch
    load_best_model_at_end=True,     # Load the best model when finished training
    metric_for_best_model="accuracy" # Use accuracy to select the best model
)

# Initialize the Trainer
trainer = Trainer(
    model=model,                         # The instantiated 🤗 Transformers model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics      # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


tokenizer_config.json:   0%|          | 0.00/1.26k [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.07k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at MoritzLaurer/mDeBERTa-v3-base-mnli-xnli and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([4, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8657,0.836212,0.509709,0.345954,0.71974,0.509709
2,0.7907,0.863399,0.508091,0.342361,0.258156,0.508091
3,0.8549,0.840187,0.478964,0.357057,0.549837,0.478964
4,0.8771,0.869532,0.467638,0.354137,0.48576,0.467638
5,0.7324,0.903694,0.519417,0.510003,0.503364,0.519417


  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Test Accuracy: 0.5194
Test F1 Score: 0.5100
Test Precision: 0.5034
Test Recall: 0.5194


  _warn_prf(average, modifier, msg_start, len(result))


## Tamil BERT

In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModelForMaskedLM, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from tqdm import tqdm

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data using the Tamil BERT model
tokenizer = AutoTokenizer.from_pretrained("l3cube-pune/tamil-bert")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained Tamil BERT model
model = AutoModelForMaskedLM.from_pretrained("l3cube-pune/tamil-bert")

# Modify the model to perform sequence classification
class CustomBERTForSequenceClassification(nn.Module):
    def __init__(self, pretrained_model, num_labels):
        super(CustomBERTForSequenceClassification, self).__init__()
        self.bert = pretrained_model.bert  # Use the BERT model's transformer layers
        self.classifier = nn.Linear(pretrained_model.config.hidden_size, num_labels)  # Add a classification head

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
        outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])  # Use the CLS token's representation
        loss = None
        if labels is not None:
            loss = nn.CrossEntropyLoss()(logits, labels)
        return (loss, logits)

# Initialize the custom model for sequence classification
model = CustomBERTForSequenceClassification(model, num_labels=4)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=10,              # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    evaluation_strategy="epoch",     # Evaluate every epoch
    save_strategy="epoch",           # Save the model every epoch
    load_best_model_at_end=True,     # Load the best model when finished training
    metric_for_best_model="accuracy" # Use accuracy to select the best model
)

# Initialize the Trainer
trainer = Trainer(
    model=model,                         # The instantiated model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics      # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


tokenizer_config.json:   0%|          | 0.00/450 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/3.16M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/6.41M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/951M [00:00<?, ?B/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.2502,1.238228,0.508091,0.342361,0.258156,0.508091
2,1.1363,1.139824,0.508091,0.342361,0.258156,0.508091
3,1.0154,0.982702,0.508091,0.342361,0.258156,0.508091
4,0.9031,0.877787,0.508091,0.342361,0.258156,0.508091
5,0.8894,0.848834,0.508091,0.342361,0.258156,0.508091
6,0.8012,0.839008,0.508091,0.342361,0.258156,0.508091
7,0.7432,0.837216,0.508091,0.342361,0.258156,0.508091
8,0.8938,0.856946,0.506472,0.47182,0.48562,0.506472
9,0.7874,0.856873,0.511327,0.501408,0.493861,0.511327
10,0.6482,0.872405,0.530744,0.519473,0.512489,0.530744


  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key:

Test Accuracy: 0.5307
Test F1 Score: 0.5195
Test Precision: 0.5125
Test Recall: 0.5307


  _warn_prf(average, modifier, msg_start, len(result))


## Indic BERT

In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModel, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from tqdm import tqdm

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data using IndicBERT
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained IndicBERT model
model = AutoModel.from_pretrained("ai4bharat/indic-bert")

# Modify the model to perform sequence classification
class CustomIndicBERTForSequenceClassification(nn.Module):
    def __init__(self, pretrained_model, num_labels):
        super(CustomIndicBERTForSequenceClassification, self).__init__()
        self.bert = pretrained_model  # Use the IndicBERT model's transformer layers
        self.classifier = nn.Linear(pretrained_model.config.hidden_size, num_labels)  # Add a classification head

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
        outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])  # Use the CLS token's representation
        loss = None
        if labels is not None:
            loss = nn.CrossEntropyLoss()(logits, labels)
        return (loss, logits)

# Initialize the custom model for sequence classification
model = CustomIndicBERTForSequenceClassification(model, num_labels=4)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=10,              # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    evaluation_strategy="epoch",     # Evaluate every epoch
    save_strategy="epoch",           # Save the model every epoch
    load_best_model_at_end=True,     # Load the best model when finished training
    metric_for_best_model="accuracy" # Use accuracy to select the best model
)

# Initialize the Trainer
trainer = Trainer(
    model=model,                         # The instantiated model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics      # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


config.json:   0%|          | 0.00/507 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/5.65M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/135M [00:00<?, ?B/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8461,0.833746,0.508091,0.342361,0.258156,0.508091
2,0.7791,0.838227,0.508091,0.342361,0.258156,0.508091
3,0.8533,0.874417,0.461165,0.291101,0.212673,0.461165
4,0.8715,0.847328,0.461165,0.291101,0.212673,0.461165
5,0.8582,0.836701,0.451456,0.322144,0.418475,0.451456
6,0.7635,0.84298,0.514563,0.356603,0.721003,0.514563
7,0.7016,0.847918,0.491909,0.482389,0.475013,0.491909
8,0.8239,0.96666,0.491909,0.481147,0.474563,0.491909
9,0.5728,1.08419,0.514563,0.501595,0.496474,0.514563
10,0.3945,1.183752,0.508091,0.495744,0.490588,0.508091


  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key:

  _warn_prf(average, modifier, msg_start, len(result))


Test Accuracy: 0.5146
Test F1 Score: 0.3566
Test Precision: 0.7210
Test Recall: 0.5146


## Indic BART

In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from tqdm import tqdm

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data using IndicBART
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBART")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained IndicBART model
pretrained_model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/IndicBART")

# Modify the model to perform sequence classification
class CustomIndicBARTForSequenceClassification(nn.Module):
    def __init__(self, pretrained_model, num_labels):
        super(CustomIndicBARTForSequenceClassification, self).__init__()
        self.config = pretrained_model.config  # Inherit the configuration from the pre-trained model
        self.encoder = pretrained_model.model.encoder  # Encoder layer from the IndicBART model
        self.classifier = nn.Linear(pretrained_model.config.d_model, num_labels)  # Add a classification head

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])  # Use the CLS token's representation
        loss = None
        if labels is not None:
            loss = nn.CrossEntropyLoss()(logits, labels)
        return (loss, logits)

# Initialize the custom model for sequence classification
model = CustomIndicBARTForSequenceClassification(pretrained_model, num_labels=4)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted', zero_division=0)
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=10,             # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    eval_strategy="epoch",           # Evaluate every epoch
    save_strategy="no",              # Disable saving model checkpoints
    load_best_model_at_end=False,    # Do not load the best model at the end
    greater_is_better=True,          # Higher accuracy is better
)

# Initialize the Trainer without model saving
trainer = Trainer(
    model=model,                         # The instantiated model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics,     # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


tokenizer_config.json:   0%|          | 0.00/498 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/832 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.90M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/221 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/398 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


pytorch_model.bin:   0%|          | 0.00/976M [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.2616,1.201548,0.470874,0.335097,0.508132,0.470874
2,0.813,0.843165,0.508091,0.342361,0.258156,0.508091
3,0.8931,0.830919,0.508091,0.34803,0.488765,0.508091
4,0.8693,0.830708,0.506472,0.341638,0.257751,0.506472
5,0.8773,0.830604,0.506472,0.341638,0.257751,0.506472
6,0.7791,0.832273,0.508091,0.342361,0.258156,0.508091
7,0.7204,0.839306,0.508091,0.342361,0.258156,0.508091
8,0.9292,0.832753,0.508091,0.342361,0.258156,0.508091
9,0.8394,0.834642,0.508091,0.342361,0.258156,0.508091
10,0.7456,0.832844,0.508091,0.342361,0.258156,0.508091


Test Accuracy: 0.5081
Test F1 Score: 0.3424
Test Precision: 0.2582
Test Recall: 0.5081


## MuRiL

In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForMaskedLM, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from tqdm import tqdm

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data using MuRIL
tokenizer = AutoTokenizer.from_pretrained("google/muril-base-cased")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}  # Updated to clone and detach
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained MuRIL model
model = AutoModelForMaskedLM.from_pretrained("google/muril-base-cased")

# Modify the model to perform sequence classification
class CustomMuRILForSequenceClassification(nn.Module):
    def __init__(self, pretrained_model, num_labels):
        super(CustomMuRILForSequenceClassification, self).__init__()
        self.muril = pretrained_model.bert  # Use the BERT model from MuRIL's transformer layers
        self.classifier = nn.Linear(pretrained_model.config.hidden_size, num_labels)  # Add a classification head

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
        outputs = self.muril(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])  # Use the CLS token's representation
        loss = None
        if labels is not None:
            loss = nn.CrossEntropyLoss()(logits, labels)
        return (loss, logits)

# Initialize the custom model for sequence classification
model = CustomMuRILForSequenceClassification(model, num_labels=4)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted', zero_division=0)
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=10,              # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    eval_strategy="epoch",           # Evaluate every epoch
    save_strategy="epoch",           # Save the model every epoch
    load_best_model_at_end=True,     # Load the best model when finished training
    metric_for_best_model="accuracy",# Use accuracy to select the best model
    save_total_limit=1               # Only keep the best model to avoid excessive disk usage
)

# Initialize the Trainer
trainer = Trainer(
    model=model,                         # The instantiated model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics      # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


tokenizer_config.json:   0%|          | 0.00/206 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/3.16M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/113 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/953M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/muril-base-cased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.2758,1.264631,0.508091,0.342361,0.258156,0.508091
2,1.1627,1.163866,0.508091,0.342361,0.258156,0.508091
3,1.0275,0.995841,0.508091,0.342361,0.258156,0.508091
4,0.9065,0.881777,0.508091,0.342361,0.258156,0.508091
5,0.8897,0.850318,0.508091,0.342361,0.258156,0.508091
6,0.8031,0.839848,0.508091,0.342361,0.258156,0.508091
7,0.7436,0.838174,0.508091,0.342361,0.258156,0.508091
8,0.9314,0.833545,0.508091,0.342361,0.258156,0.508091
9,0.8428,0.83313,0.508091,0.342361,0.258156,0.508091
10,0.7509,0.839988,0.516181,0.48279,0.496681,0.516181


Test Accuracy: 0.5162
Test F1 Score: 0.4828
Test Precision: 0.4967
Test Recall: 0.5162


## LabSe

In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from tqdm import tqdm

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data using LaBSE
tokenizer = AutoTokenizer.from_pretrained("setu4993/LaBSE")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained LaBSE model
model = AutoModel.from_pretrained("setu4993/LaBSE")

# Modify the model to perform sequence classification
class CustomLaBSEForSequenceClassification(nn.Module):
    def __init__(self, pretrained_model, num_labels):
        super(CustomLaBSEForSequenceClassification, self).__init__()
        self.labse = pretrained_model  # Use the LaBSE model's transformer layers
        self.classifier = nn.Linear(pretrained_model.config.hidden_size, num_labels)  # Add a classification head

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.labse(input_ids, attention_mask=attention_mask)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])  # Use the CLS token's representation
        loss = None
        if labels is not None:
            loss = nn.CrossEntropyLoss()(logits, labels)
        return (loss, logits)

# Initialize the custom model for sequence classification
model = CustomLaBSEForSequenceClassification(model, num_labels=4)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=10,              # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    evaluation_strategy="epoch",     # Evaluate every epoch
    save_strategy="epoch",           # Save the model every epoch
    load_best_model_at_end=True,     # Load the best model when finished training
    metric_for_best_model="accuracy" # Use accuracy to select the best model
)

# Initialize the Trainer
trainer = Trainer(
    model=model,                         # The instantiated model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics      # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


tokenizer_config.json:   0%|          | 0.00/367 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/5.22M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/13.6M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/611 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.88G [00:00<?, ?B/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.876,0.837748,0.488673,0.399221,0.441004,0.488673
2,0.8086,0.848612,0.504854,0.37233,0.462876,0.504854
3,0.8324,0.856044,0.508091,0.486941,0.486347,0.508091
4,0.8444,1.027149,0.478964,0.475259,0.490726,0.478964
5,0.3945,1.261763,0.490291,0.484412,0.478987,0.490291
6,0.3582,1.577425,0.495146,0.485539,0.488051,0.495146
7,0.2543,1.835485,0.485437,0.484928,0.49157,0.485437
8,0.1723,2.252044,0.483819,0.482403,0.482052,0.483819
9,0.1376,2.554358,0.488673,0.485557,0.483519,0.488673
10,0.0675,2.814353,0.478964,0.47795,0.477355,0.478964


  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  _warn_prf(average, modifier, msg_start, len(result))
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Test Accuracy: 0.5081
Test F1 Score: 0.4869
Test Precision: 0.4863
Test Recall: 0.5081


  _warn_prf(average, modifier, msg_start, len(result))


## mT5

In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from tqdm import tqdm

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data using mT5
tokenizer = AutoTokenizer.from_pretrained("google/mt5-base")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained mT5 model
pretrained_model = AutoModelForSeq2SeqLM.from_pretrained("google/mt5-base")

# Modify the model to perform sequence classification
class CustomMT5ForSequenceClassification(nn.Module):
    def __init__(self, pretrained_model, num_labels):
        super(CustomMT5ForSequenceClassification, self).__init__()
        self.config = pretrained_model.config  # Inherit the configuration from the pre-trained model
        self.shared = pretrained_model.shared  # The shared embedding layer
        self.encoder = pretrained_model.encoder  # Encoder layer from the mT5 model
        self.classifier = nn.Linear(pretrained_model.config.d_model, num_labels)  # Add a classification head

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])  # Use the CLS token's representation
        loss = None
        if labels is not None:
            loss = nn.CrossEntropyLoss()(logits, labels)
        return (loss, logits)

# Initialize the custom model for sequence classification
model = CustomMT5ForSequenceClassification(pretrained_model, num_labels=4)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted', zero_division=0)
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments with disabled checkpoint saving
training_args = TrainingArguments(
    output_dir='./results',          # Output directory (won't be used)
    num_train_epochs=10,             # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    eval_strategy="epoch",           # Evaluate every epoch
    save_strategy="no",              # Disable saving model checkpoints
    load_best_model_at_end=False,    # Do not load the best model at the end
    greater_is_better=True,          # Higher accuracy is better
)

# Initialize the Trainer without model saving
trainer = Trainer(
    model=model,                         # The instantiated model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics,     # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


tokenizer_config.json:   0%|          | 0.00/376 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/702 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


pytorch_model.bin:   0%|          | 0.00/2.33G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.297,1.299263,0.29288,0.364303,0.531584,0.29288
2,1.0251,1.009576,0.378641,0.313667,0.375505,0.378641
3,0.9864,0.977437,0.438511,0.324522,0.471163,0.438511
4,0.9814,0.941118,0.478964,0.469302,0.467842,0.478964
5,0.9288,0.920187,0.477346,0.455957,0.469585,0.477346
6,0.8268,0.926309,0.491909,0.450795,0.466135,0.491909
7,0.8239,0.92399,0.495146,0.433226,0.465666,0.495146
8,0.9373,0.91617,0.495146,0.474234,0.474311,0.495146
9,0.8653,0.918935,0.493528,0.451978,0.468991,0.493528
10,0.8189,0.916728,0.488673,0.459252,0.464465,0.488673


Test Accuracy: 0.4887
Test F1 Score: 0.4593
Test Precision: 0.4645
Test Recall: 0.4887


## Tamillion

In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from tqdm import tqdm

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data using the Tamillion model
tokenizer = AutoTokenizer.from_pretrained("monsoon-nlp/tamillion")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained Tamillion model
model = AutoModel.from_pretrained("monsoon-nlp/tamillion")

# Modify the model to perform sequence classification
class CustomTamillionForSequenceClassification(nn.Module):
    def __init__(self, pretrained_model, num_labels):
        super(CustomTamillionForSequenceClassification, self).__init__()
        self.tamillion = pretrained_model  # Use the Tamillion model's transformer layers
        self.classifier = nn.Linear(pretrained_model.config.hidden_size, num_labels)  # Add a classification head

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
        outputs = self.tamillion(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])  # Use the CLS token's representation
        loss = None
        if labels is not None:
            loss = nn.CrossEntropyLoss()(logits, labels)
        return (loss, logits)

# Initialize the custom model for sequence classification
model = CustomTamillionForSequenceClassification(model, num_labels=4)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted', zero_division=0)
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=10,              # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    eval_strategy="epoch",           # Evaluate every epoch
    save_strategy="epoch",           # Save the model every epoch
    load_best_model_at_end=True,     # Load the best model when finished training
    metric_for_best_model="accuracy",# Use accuracy to select the best model
    save_total_limit=1               # Only keep the best model to avoid excessive disk usage
)

# Initialize the Trainer
trainer = Trainer(
    model=model,                         # The instantiated model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics      # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


tokenizer_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/736 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/837k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/467M [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.9818,1.437601,0.021036,0.000867,0.000442,0.021036
2,0.7959,1.410575,0.021036,0.000867,0.000442,0.021036
3,0.8932,1.360666,0.508091,0.342361,0.258156,0.508091
4,0.865,1.267278,0.508091,0.342361,0.258156,0.508091
5,0.8849,1.255939,0.508091,0.342361,0.258156,0.508091
6,0.775,1.243546,0.508091,0.342361,0.258156,0.508091
7,0.7224,1.235908,0.508091,0.342361,0.258156,0.508091
8,0.9379,1.260063,0.508091,0.342361,0.258156,0.508091
9,0.843,1.247866,0.508091,0.342361,0.258156,0.508091
10,0.7533,1.251249,0.508091,0.342361,0.258156,0.508091


Test Accuracy: 0.5081
Test F1 Score: 0.3424
Test Precision: 0.2582
Test Recall: 0.5081


## LEALLA

In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from tqdm import tqdm

# Load the dataset
df = pd.read_csv("/content/SPELLL-2024-Fine-Grained-Dataset-with-Expert-Annotations-and-LLMs-for-Fake-News-in-Tamil/Data/Expert-Annotated Dataset.csv")

# Map the categories to numerical labels
label_dict = {'Clickbait': 0, 'Misleading': 1, 'Biased': 2, 'Humor': 3}
df['Category'] = df['Category'].map(label_dict)

# Split the data into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(df['Text'], df['Category'], test_size=0.2, random_state=42)

# Load the tokenizer and tokenize the data using LEALLA-base
tokenizer = AutoTokenizer.from_pretrained("setu4993/LEALLA-base")

train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(list(test_texts), truncation=True, padding=True, max_length=128, return_tensors="pt")

# Custom Dataset class
class NewsDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Create datasets
train_dataset = NewsDataset(train_encodings, train_labels.values)
test_dataset = NewsDataset(test_encodings, test_labels.values)

# Load the pre-trained LEALLA-base model
pretrained_model = AutoModel.from_pretrained("setu4993/LEALLA-base")

# Modify the model to perform sequence classification
class CustomLEALLAForSequenceClassification(nn.Module):
    def __init__(self, pretrained_model, num_labels):
        super(CustomLEALLAForSequenceClassification, self).__init__()
        self.config = pretrained_model.config  # Inherit the configuration from the pre-trained model
        self.bert = pretrained_model  # The LEALLA model
        self.classifier = nn.Linear(pretrained_model.config.hidden_size, num_labels)  # Add a classification head

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])  # Use the CLS token's representation
        loss = None
        if labels is not None:
            loss = nn.CrossEntropyLoss()(logits, labels)
        return (loss, logits)

# Initialize the custom model for sequence classification
model = CustomLEALLAForSequenceClassification(pretrained_model, num_labels=4)

# Define metrics for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    accuracy = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    precision = precision_score(labels, preds, average='weighted', zero_division=0)
    recall = recall_score(labels, preds, average='weighted')
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=10,             # Number of training epochs
    per_device_train_batch_size=16,  # Batch size for training
    per_device_eval_batch_size=16,   # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,                # Log every 10 steps
    eval_strategy="epoch",           # Evaluate every epoch
    save_strategy="no",              # Disable saving model checkpoints
    load_best_model_at_end=False,    # Do not load the best model at the end
    greater_is_better=True,          # Higher accuracy is better
)

# Initialize the Trainer without model saving
trainer = Trainer(
    model=model,                         # The instantiated model to be trained
    args=training_args,                  # Training arguments, defined above
    train_dataset=train_dataset,         # Training dataset
    eval_dataset=test_dataset,           # Evaluation dataset
    compute_metrics=compute_metrics,     # The callback that computes metrics of interest
)

# Train the model
trainer.train()

# Evaluate the model on the test set
results = trainer.evaluate()

# Print the results
print(f"Test Accuracy: {results['eval_accuracy']:.4f}")
print(f"Test F1 Score: {results['eval_f1']:.4f}")
print(f"Test Precision: {results['eval_precision']:.4f}")
print(f"Test Recall: {results['eval_recall']:.4f}")


tokenizer_config.json:   0%|          | 0.00/367 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/5.22M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/13.6M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/610 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/428M [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.3202,1.326091,0.454693,0.288292,0.211054,0.454693
2,0.844,0.846695,0.506472,0.341638,0.257751,0.506472
3,0.9007,0.839529,0.472492,0.401954,0.475888,0.472492
4,0.8701,0.834353,0.508091,0.342361,0.258156,0.508091
5,0.8971,0.836052,0.506472,0.398865,0.48394,0.506472
6,0.7667,0.834992,0.5,0.344267,0.358612,0.5
7,0.7115,0.837792,0.506472,0.341638,0.257751,0.506472
8,0.9288,0.831345,0.514563,0.439877,0.499755,0.514563
9,0.8164,0.836164,0.509709,0.412179,0.493535,0.509709
10,0.7411,0.835751,0.516181,0.443866,0.502574,0.516181


Test Accuracy: 0.5162
Test F1 Score: 0.4439
Test Precision: 0.5026
Test Recall: 0.5162
