In [None]:
!pip install transformers
!pip install imblearn
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedKFold
from transformers import TFBertForSequenceClassification, BertTokenizer, AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, EvalPrediction

from torch.utils.data import Dataset
import os

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import TomekLinks
from imblearn.under_sampling import NeighbourhoodCleaningRule
import tensorflow as tf



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m111.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m62.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.28.1
Looking in indexes: https://pypi.org/simple, https://u

In [None]:

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/Twitch_Colab/twitch_messages.csv')
print(df.index)
label_counts = df['label'].value_counts()
print(label_counts)

RangeIndex(start=0, stop=4000, step=1)
Irrelevant to Streamer    2555
Relevant to Streamer      1445
Name: label, dtype: int64


In [None]:
import string
import re

#model_checkpoint = "albert-base-v2"

# Which models can we use?
# 1. Bert, 2. Roberta, 3. Bart,
# Create a directory to save the models
os.makedirs("saved_models", exist_ok=True)

def apply_sampling(X_train,y_train, which_sample):
  if which_sample == 0:
    smote = SMOTE(random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)
    return X_train_resampled, y_train_resampled
  elif which_sample == 1:
    tomek = TomekLinks()
    X_train_resampled_tomek, y_train_resampled_tomek = tomek.fit_resample(X_train, y_train)
    return X_train_resampled_tomek, y_train_resampled_tomek
  elif which_sample == 2:
    ncl = NeighbourhoodCleaningRule()
    X_train_resampled_tomek_ncl, y_train_resampled_tomek_ncl = ncl.fit_resample(X_train,
                                                                                y_train)
    return X_train_resampled_tomek_ncl, y_train_resampled_tomek_ncl

  else:
    print("specify the sampling technique")





# Load the dataset and preprocess it
def load_and_preprocess_data():
    # Load the IMDB movie reviews dataset
    #imdb_dataset = load_dataset("imdb")
    df = pd.read_csv('/content/drive/MyDrive/Twitch_Colab/twitch_messages.csv')
    # df2 = pd.read_csv('/content/drive/MyDrive/Twitch_Colab/data.csv')
    #1. replace @name with @author, if any
    unique_values = df['StreamerName'].unique().tolist()

    df['replaced_author'] = df['message']
    #display how many?

    unique_values = df['StreamerName'].unique().tolist()
    for item in unique_values:
        df['replaced_author'] = df['replaced_author'].str.replace(item, 'author')
    unique_values = [x.lower() for x in unique_values]
    for item in unique_values:
        df['replaced_author'] = df['replaced_author'].str.replace(item.lower(), 'author')


    #print("author name was replaced with @author {0} times".format(df['replaced_author'].str.count('author').sum()))
    # 2. Remove all spaces on message itself, without replacing author name
    df['no_spaces'] = df['message'].str.replace(r'\s+', ' ')
    # 3. Remove punctuation, maybe we should not remove it? since some are about questions?
    df['no_punctuation'] =  df['no_spaces'].str.replace('[{}]'.format(string.punctuation), '')

    #df['stop_words'] = df['messages'].apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in stopwords.words('english')]))

    new_df = pd.DataFrame({
        'video_id': df['VidID'],
        'streamer': df['StreamerName'],
        'message': df['message'],
        'replaced_author': df['replaced_author'],
        'no_spaces': df['no_spaces'],
        'no_punctuation':  df['no_punctuation'],
        'label': df['Mohammad_Code']
    })

    # 4. Remove duplicate messages
    new_df = new_df.drop_duplicates(subset=['no_spaces'],keep='first')

    # 5. make it balanced, simple 
    new_df = new_df.dropna()
    new_df['replaced_author'] = new_df['replaced_author'].apply(lambda x: x.replace("@", "@user") if isinstance(x, str) and "@" in x and "@author" not in x else x)

    pattern = r"@\w+"
    for i, row in new_df.iterrows():
        message = row['replaced_author']
        if isinstance(message, str) and "@user" in message:
            # replace any occurrence of "@userstring" with "@user"
            message = re.sub(pattern, "@user", message)
            #print(message)
            # strip "user" from "@user"
            #message = message.replace("@user", "@")
            # update the message in the DataFrame
            df.loc[i, 'replaced_author'] = message

    #label_counts = new_df['label'].value_counts()
    print("before sampling")
    count_rel = (new_df['label'] == 0).sum()
    count_irr = (new_df['label'] == 1).sum()
    print("we have {0} relevant to streamer messages, and {1} irrelevant to streamer messages".format(count_rel, count_irr))
    # Sampling
    # g = new_df.groupby('label')
    # new_df = g.apply(lambda x: x.sample(g.size().min(), random_state=1))


    print("author name was replaced with @author {0} times".format(df['replaced_author'].str.count('@author').sum()))
    print("user name was replaced with @user {0} times".format(df['replaced_author'].str.count('@user').sum()))

    label_counts = new_df['label'].value_counts()
    #print("sampling", label_counts)
    
    # Combine the 'train' and 'test' splits into a single dataset
    # We can change the the pre-processing here
    dataset = {
        'text': new_df['no_spaces'].tolist(),
        'labels': new_df['label'].tolist()
    }

    # print(len(dataset["labels"]))
    # dataset = {
    #   'text': df2['text'].tolist(),
    #   'labels': df2['label'].tolist()
    #   }
    print("After Sampling")
    count_rel = (new_df['label'] == 0).sum()
    count_irr = (new_df['label'] == 1).sum()
    print("we have {0} relevant to streamer messages, and {1} irrelevant to streamer messages".format(count_rel, count_irr))

    return dataset

# Custom dataset class
class BinaryClassificationDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Function to encode the dataset
def encode_dataset(tokenizer, texts, labels, max_length=512):
    encoded_data = tokenizer(texts, padding='max_length', truncation=True, max_length=max_length, return_tensors='pt')
    return BinaryClassificationDataset(encoded_data, labels)


def predict(text, model, tokenizer):
    # Preprocess the input text
    inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors='pt')

    # Move the model to the same device as the input tensors
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    inputs = {name: tensor.to(device) for name, tensor in inputs.items()}

    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the predicted class (0 or 1)
    predicted_class = torch.argmax(outputs.logits, dim=-1).item()

    return predicted_class

# load_and_preprocess_data()


  df['no_spaces'] = df['replaced_author'].str.replace(r'\s+', ' ')
  df['no_punctuation'] =  df['message'].str.replace('[{}]'.format(string.punctuation), '')


0                                                  ohhhhh
1             If i set my legs on fire will i run faster?
2                                              aaayyyeeee
3       entobiBugLove entobiBugLove entobiBugLove ento...
4          @user thank you for the gift sub!! gopiraHeart
                              ...                        
3993                                      just subscribed
3994       you can also do go doc strings | grep contains
3995                                                   Yo
3997                               PORQUEEEEEEE MARIAAAA?
3999            I get so stoked when I see you streaming.
Name: replaced_author, Length: 3404, dtype: object
before sampling
we have 1413 relevant to streamer messages, and 1991 irrelevant to streamer messages
author name was replaced with @author 36 times
user name was replaced with @user 99 times
After Sampling
we have 1413 relevant to streamer messages, and 1991 irrelevant to streamer messages


{'text': ['ohhhhh',
  'If i set my legs on fire will i run faster',
  'aaayyyeeee',
  'entobiBugLove entobiBugLove entobiBugLove entobiBugLove entobiBugLove entobiNasiPog entobiBugLove entobiBugLove entobiBugLove entobiBugLove entobiBugLove entobiBugLove entobiBugLove',
  'leor0111 thank you for the gift sub gopiraHeart',
  'gopiraSip',
  'Hello how are you today',
  'Enderscram game dev is a small world thats why',
  'anyone know of rose of starcross  am i interrupting something ',
  'do you support lgbtq',
  'gopiraDoggers',
  'I love that it can be both a chemistry and guacamole pun',
  'so Thor what about that secret beatem up you were working on last year is it finished is it published I lost a couple streams so I didnt got any news about it',
  'NAME',
  'choose  the bottom yes',
  'I support everyone but Thor',
  'nice',
  'LUL',
  'Thank you blumberquack',
  'updates',
  'I forgot how incredible that animation is LUL',
  'hey GoPirateSoftware whats the update on the game dev ca

In [None]:
# Define which model will we be using
#done the first one

models = ["setfit-distilbert-user-intent"] 
models = ["albert-base-v2", "bert-base-uncased" , "roberta-base", "distilbert-base-uncased"]
# models = ["bert-base-uncased"]
sampling_technique = 0
print("Unprocessed")
for m in models:
  tokenizer = AutoTokenizer.from_pretrained(m)

  # Load and preprocess the dataset
  dataset = load_and_preprocess_data()

  # Initialize StratifiedKFold with 10 folds
  n_splits = 5
  kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

  # Collect the evaluation metrics across all folds
  accuracy_list = []
  precision_list = []
  recall_list = []
  f1_list = []
  smote = SMOTE(random_state=42)

  for train_index, val_index in kfold.split(dataset['text'], dataset['labels']):
      train_texts = [dataset['text'][i] for i in train_index]
      train_labels = [dataset['labels'][i] for i in train_index]
      # train_texts, train_labels = apply_sampling(train_texts, train_labels, sampling_technique)
      print(len(val_index))
      print(len(train_index))


      val_texts = [dataset['text'][i] for i in val_index]
      val_labels = [dataset['labels'][i] for i in val_index]
     
      # Encode the datasets
      train_encodings = encode_dataset(tokenizer, train_texts, train_labels)
      val_encodings = encode_dataset(tokenizer, val_texts, val_labels)
      



      print(train_encodings)
      #encodings = encode_dataset(tokenizer, dataset['text'], dataset['labels'])
      #train_encodings, train_labels = smote.fit_resample(train_encodings, train_labels)
      #X_resampled, y_resampled = smote.fit_resample(encodings['text'], encodings['labels'])

      # Prepare the training arguments
      training_args = TrainingArguments(
          output_dir='./results',
          num_train_epochs=3,
          per_device_train_batch_size=8,
          per_device_eval_batch_size=8,
          warmup_steps=500,
          weight_decay=0.01,
          logging_dir='./logs',
          logging_steps=10,
          evaluation_strategy="epoch",
      )

      # Define the model
      model = AutoModelForSequenceClassification.from_pretrained(m, num_labels=2)

      # Create the Trainer
      trainer = Trainer(
          model=model,
          args=training_args,
          train_dataset=train_encodings,
          eval_dataset=val_encodings,
          compute_metrics=lambda eval_pred: {
            'accuracy': accuracy_score(eval_pred.label_ids, eval_pred.predictions.argmax(-1)),
            'precision': precision_score(eval_pred.label_ids, eval_pred.predictions.argmax(-1)),
            'recall': recall_score(eval_pred.label_ids, eval_pred.predictions.argmax(-1)),
            'f1': f1_score(eval_pred.label_ids, eval_pred.predictions.argmax(-1)),
          }
      )

      # Train and evaluate the model
      trainer.train()
      eval_metrics = trainer.evaluate()

      # Store the evaluation metrics
      accuracy_list.append(eval_metrics['eval_accuracy'])
      precision_list.append(eval_metrics['eval_precision'])
      recall_list.append(eval_metrics['eval_recall'])
      f1_list.append(eval_metrics['eval_f1'])

      # Save the trained model
      # model_save_path = f"saved_models/model_fold_{fold + 1}"
      # model.save_pretrained(model_save_path)

  # Print the standard evaluation metrics
  print("sampling technique: ", sampling_technique)
  print("Model's Results of ", m)
  print(f"Accuracy: {np.mean(accuracy_list)} (±{np.std(accuracy_list)})")
  print(f"Precision: {np.mean(precision_list)} (±{np.std(precision_list)})")
  print(f"Recall: {np.mean(recall_list)} (±{np.std(recall_list)})")
  print(f"F1-score: {np.mean(f1_list)} (±{np.std(f1_list)})")


unprocessed


Downloading (…)lve/main/config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

  df['no_spaces'] = df['replaced_author'].str.replace(r'\s+', ' ')
  df['no_punctuation'] =  df['no_spaces'].str.replace('[{}]'.format(string.punctuation), '')


before sampling
we have 1413 relevant to streamer messages, and 1991 irrelevant to streamer messages
author name was replaced with @author 36.0 times
user name was replaced with @user 99.0 times
After Sampling
we have 1413 relevant to streamer messages, and 1991 irrelevant to streamer messages
681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9fcb37580>


Downloading pytorch_model.bin:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.516,0.524714,0.743025,0.823699,0.714286,0.765101
2,0.5047,0.577182,0.709251,0.873606,0.588972,0.703593
3,0.5018,0.470186,0.791483,0.800937,0.857143,0.828087


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9fc08e460>


Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6242,0.642852,0.688693,0.66548,0.939698,0.779167
2,0.4897,0.54589,0.770925,0.821809,0.776382,0.79845
3,0.4028,0.478984,0.785609,0.811881,0.824121,0.817955


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9fc893b80>


Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6454,0.546823,0.757709,0.831909,0.733668,0.779706
2,0.5208,0.648408,0.728341,0.766917,0.768844,0.76788
3,0.5421,0.495241,0.762115,0.80102,0.788945,0.794937


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9e7f4a7c0>


Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4756,0.567657,0.737151,0.724846,0.886935,0.79774
2,0.533,0.597688,0.627019,0.818584,0.464824,0.592949
3,0.518,0.588612,0.71953,0.718816,0.854271,0.780712


680
2724
<__main__.BinaryClassificationDataset object at 0x7fe9fc08e520>


Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.477,0.55509,0.702941,0.673145,0.957286,0.790456
2,0.6025,0.577799,0.717647,0.701961,0.899497,0.788546
3,0.4725,0.501539,0.775,0.830189,0.773869,0.80104


sampling technique:  0
Model's Results of  albert-base-v2
Accuracy: 0.766747430249633 (±0.025638535237042438)
Precision: 0.792568622266104 (±0.03839047212325879)
Recall: 0.8196697774587222 (±0.03365418021302768)
F1-score: 0.8045462251464002 (±0.016773115607155175)


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

  df['no_spaces'] = df['replaced_author'].str.replace(r'\s+', ' ')
  df['no_punctuation'] =  df['no_spaces'].str.replace('[{}]'.format(string.punctuation), '')


before sampling
we have 1413 relevant to streamer messages, and 1991 irrelevant to streamer messages
author name was replaced with @author 36.0 times
user name was replaced with @user 99.0 times
After Sampling
we have 1413 relevant to streamer messages, and 1991 irrelevant to streamer messages
681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9dd950160>


Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5057,0.508064,0.760646,0.744813,0.899749,0.814983
2,0.349,0.48288,0.785609,0.862464,0.754386,0.804813
3,0.1954,0.834796,0.801762,0.856757,0.794486,0.824447


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9fc08e520>


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4449,0.501726,0.790015,0.786517,0.879397,0.830368
2,0.4477,0.496214,0.788546,0.901899,0.71608,0.798319
3,0.2714,0.71561,0.801762,0.831234,0.829146,0.830189


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9e7dba9d0>


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.514,0.431351,0.809104,0.86612,0.796482,0.829843
2,0.312,0.467622,0.810573,0.889855,0.771357,0.82638
3,0.2591,0.822159,0.803231,0.84375,0.81407,0.828645


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9e63d27c0>


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3896,0.506255,0.757709,0.845697,0.71608,0.77551
2,0.411,0.562228,0.751836,0.856698,0.690955,0.764951
3,0.3264,0.851908,0.797357,0.829949,0.821608,0.825758


680
2724
<__main__.BinaryClassificationDataset object at 0x7fe9e7e72460>


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4456,0.476601,0.766176,0.747412,0.907035,0.819523
2,0.3051,0.47738,0.802941,0.805556,0.874372,0.838554
3,0.2018,0.732894,0.802941,0.856757,0.796482,0.825521


sampling technique:  0
Model's Results of  bert-base-uncased
Accuracy: 0.8014105554115918 (±0.00211353016548245)
Precision: 0.8436894018038291 (±0.011708440736756656)
Recall: 0.8111585496404328 (±0.013671611402435255)
F1-score: 0.8269117847630449 (±0.00214852530150938)


Downloading (…)lve/main/config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

  df['no_spaces'] = df['replaced_author'].str.replace(r'\s+', ' ')
  df['no_punctuation'] =  df['no_spaces'].str.replace('[{}]'.format(string.punctuation), '')


before sampling
we have 1413 relevant to streamer messages, and 1991 irrelevant to streamer messages
author name was replaced with @author 36.0 times
user name was replaced with @user 99.0 times
After Sampling
we have 1413 relevant to streamer messages, and 1991 irrelevant to streamer messages
681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9fc76a910>


Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4375,0.569993,0.779736,0.826772,0.789474,0.807692
2,0.5512,0.530104,0.781204,0.867647,0.739348,0.798376
3,0.3031,0.781603,0.787078,0.850829,0.77193,0.809461


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9dda3ae20>


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4857,0.485661,0.779736,0.784404,0.859296,0.820144
2,0.5016,0.589884,0.773862,0.912162,0.678392,0.778098
3,0.5263,0.755878,0.784141,0.832891,0.788945,0.810323


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9fc91a3d0>


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.424,0.446311,0.797357,0.833333,0.816583,0.824873
2,0.2269,0.63807,0.795888,0.903427,0.728643,0.806676
3,0.3844,0.66644,0.807636,0.878187,0.778894,0.825566


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9e6254be0>


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4482,0.595304,0.765051,0.838068,0.741206,0.786667
2,0.3803,0.64157,0.74743,0.889655,0.648241,0.75
3,0.2338,0.781458,0.792952,0.848238,0.786432,0.816167


680
2724
<__main__.BinaryClassificationDataset object at 0x7fe9e63325b0>


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.455,0.52056,0.723529,0.691606,0.952261,0.801268
2,0.5437,0.576783,0.773529,0.797561,0.821608,0.809406
3,0.2715,0.527532,0.819118,0.849873,0.839196,0.844501


sampling technique:  0
Model's Results of  roberta-base
Accuracy: 0.798184762891941 (±0.01323243800984067)
Precision: 0.8520036401452729 (±0.014635215791132358)
Recall: 0.793079432248964 (±0.023818215687438984)
F1-score: 0.8212034488107808 (±0.012987915101509623)


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

  df['no_spaces'] = df['replaced_author'].str.replace(r'\s+', ' ')
  df['no_punctuation'] =  df['no_spaces'].str.replace('[{}]'.format(string.punctuation), '')


before sampling
we have 1413 relevant to streamer messages, and 1991 irrelevant to streamer messages
author name was replaced with @author 36.0 times
user name was replaced with @user 99.0 times
After Sampling
we have 1413 relevant to streamer messages, and 1991 irrelevant to streamer messages
681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9e7dd4940>


Downloading pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifi

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5022,0.472093,0.784141,0.7625,0.917293,0.832765
2,0.3849,0.44299,0.797357,0.857534,0.784461,0.819372
3,0.1398,0.725687,0.810573,0.860963,0.807018,0.833118


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9e63d2970>


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifi

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4856,0.450071,0.772394,0.754717,0.904523,0.822857
2,0.3598,0.519205,0.779736,0.889937,0.711055,0.790503
3,0.2907,0.818716,0.787078,0.828571,0.801508,0.814815


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9fc07c250>


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifi

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4502,0.442777,0.804699,0.838875,0.824121,0.831432
2,0.2273,0.484026,0.814978,0.867568,0.806533,0.835938
3,0.2083,0.815695,0.809104,0.848958,0.819095,0.83376


681
2723
<__main__.BinaryClassificationDataset object at 0x7fe9e7db0430>


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifi

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.397,0.538385,0.745962,0.818697,0.726131,0.76964
2,0.4558,0.541642,0.765051,0.841954,0.736181,0.785523
3,0.1328,0.907143,0.779736,0.829787,0.78392,0.806202


680
2724
<__main__.BinaryClassificationDataset object at 0x7fe9fc83ff10>


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifi

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4108,0.46606,0.755882,0.740664,0.896985,0.811364
2,0.3294,0.453499,0.802941,0.833333,0.829146,0.831234
3,0.0929,0.708152,0.816176,0.864,0.81407,0.838292


sampling technique:  0
Model's Results of  distilbert-base-uncased
Accuracy: 0.8005333851602314 (±0.014370456164907897)
Precision: 0.846455912558447 (±0.01498131999142143)
Recall: 0.8051221017367538 (±0.012178097920051747)
F1-score: 0.825237209310305 (±0.012459159321526487)


In [None]:
#poche replication
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score, KFold
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords

# Load data from CSV file
data = load_and_preprocess_data()
X = data['message']
y = data['label']

# Define the stemmer and stopwords for preprocessing
stemmer = SnowballStemmer('english')
stop_words = set(stopwords.words('english'))

# Define the vectorizers for the three preprocessing methods
vectorizers = [
    ('No Preprocessing', CountVectorizer(), None),
    ('Stemming', CountVectorizer(analyzer=stemmer.stem), stemmer),
    ('Stopwords Removal and Stemming', CountVectorizer(analyzer=stemmer.stem, stop_words=stop_words), stemmer)
]

# Define the classifiers
classifiers = [
    ('Naive Bayes', MultinomialNB()),
    ('SVM', LinearSVC())
]

# Define the evaluation metrics
metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']

# Perform 10-fold cross-validation for each preprocessing method and each classifier
for vectorizer_name, vectorizer, stemmer in vectorizers:
    X_processed = X.apply(lambda x: ' '.join([stemmer.stem(word) for word in x.split()]) if stemmer else x)
    X_processed = vectorizer.fit_transform(X_processed)

    for classifier_name, classifier in classifiers:
        pipeline = make_pipeline(vectorizer, classifier)
        scores = cross_val_score(pipeline, X, y, cv=KFold(n_splits=10, shuffle=True, random_state=42), scoring=metrics)
        print(f"{vectorizer_name} with {classifier_name}:\n"
              f"Accuracy: {scores.mean(axis=0)[0]:.4f}\n"
              f"Precision: {scores.mean(axis=0)[1]:.4f}\n"
              f"Recall: {scores.mean(axis=0)[2]:.4f}\n"
              f"F1-score: {scores.mean(axis=0)[3]:.4f}\n")


In [None]:
dataset = load_and_preprocess_data()

# initialize the BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = TFBertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# define the number of cross-validation folds
num_folds = 5

# define the sampling techniques
sampling_techniques = [
    ('SMOTE', SMOTE()),
    ('TomekLinks', TomekLinks()),
    ('NeighbourhoodCleaningRule', NeighbourhoodCleaningRule())
]

# define the evaluation metrics
metrics = ['accuracy', 'precision', 'recall', 'f1-score']

# define the k-fold cross-validation strategy
skf = StratifiedKFold(n_splits=num_folds, shuffle=True)

# initialize the arrays to store the evaluation results for each fold and each sampling technique
results = {technique[0]: {metric: [] for metric in metrics} for technique in sampling_techniques}

# perform k-fold cross-validation
for fold, (train_index, val_index) in enumerate(skf.split(dataset['text'], dataset['labels'])):
    # extract the training and validation sets for the current fold

   
    X_train = [dataset['text'][i] for i in train_index]
    y_train = [dataset['labels'][i] for i in train_index]
    
    X_train = np.array(X_train)
    X_train = np.reshape(X_train, (-1, 1))
    print(X_train.shape)
    y_train = np.array(y_train)
    y_train = np.reshape(y_train, (-1, 1))
    print(y_train)

    X_val, y_val = [dataset['text'][i] for i in val_index], [dataset['labels'][i] for i in val_index]

    # apply each sampling technique to the training set
    for technique_name, sampler in sampling_techniques:
        # x_train = np.array(X_train).reshape(-1, 1)
        # y_train =  np.array(y_train).reshape(-1, 1)
        #X_val = np.array(X_val).reshape(-1, 1)
        X_train_resampled, y_train_resampled = sampler.fit_resample(X_train, y_train)

        # tokenize the resampled training and validation sets
        train_inputs = tokenizer(X_train_resampled.tolist(), padding=True, truncation=True, max_length=512, return_tensors='tf')
        val_inputs = tokenizer(X_val.tolist(), padding=True, truncation=True, max_length=512, return_tensors='tf')

        # train the BERT model on the resampled training set and evaluate it on the validation set
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5), loss=model.compute_loss, metrics=['accuracy'])
        model.fit(train_inputs, y_train_resampled, validation_data=(val_inputs, y_val), batch_size=16, epochs=3)

        # predict the labels for the validation set and compute the evaluation metrics
        y_pred = model.predict(val_inputs, batch_size=16)
        y_pred_classes = np.argmax(y_pred, axis=1)
        precision, recall, f1, _ = precision_recall_fscore_support(y_val, y_pred_classes, average='binary')

        # store the evaluation results for the current fold and sampling technique
        results[technique_name]['accuracy'].append(accuracy)
        results[technique_name]['precision'].append(precision)
        results[technique_name]['recall'].append(recall)
        results[technique_name]['f1-score'].append(f1)

# compute the mean evaluation metrics for each sampling technique
for technique_name, _ in sampling_techniques:
    print(f"Mean evaluation metrics for {technique_name}:")
    for metric in metrics:
        mean_metric = np.mean(results[technique_name][metric])
        print(f"{metric}: {mean_metric:.4f}")

# New Section

# New Section