# Imports and Setup

In [7]:
import optuna

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score


from classifiers import *
from dataset import TextDataset, get_dataloader
from embedder import Embedder
from Config.dataset_config import *

import nltk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('wordnet')

from tqdm import tqdm

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\amita\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\amita\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\amita\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# Define optimizers

In [2]:
# Custom tqdm callback
class TqdmCallback:
    def __init__(self, n_trials):
        self.pbar = tqdm(total=n_trials)

    def __call__(self, study, trial):
        self.pbar.update(1)

    def close(self):
        self.pbar.close()

In [13]:
model_hyperparameters = {   # TODO: Add neural network hyperparameters
    'logistic_regression': {
        'learning_rate': (1e-5, 1e-3, 'loguniform'),
        'weight_decay': (1e-5, 1e-3, 'loguniform')
    },
    'svm': {
        'C': (1e-4, 1e2, 'loguniform'),
        'kernel': (['linear', 'poly', 'rbf', 'sigmoid'], 'categorical'),
        'degree': (2, 5, 'int'),
        'gamma': (['scale', 'auto'], 'categorical')
    },
    'xgboost': {
        'n_estimators': ([5, 100], 'int'),
        'learning_rate': ([1e-3, 1.0], 'loguniform'),
        'booster': (['gbtree', 'gblinear', 'dart'], 'categorical')
    },
    'dnn': {
        "num_epochs": ([2, 15], 'int'),  # Adjust after trial and error
        "learning_rate": ([1e-5, 1e-3], 'loguniform'),
        "batch_norm": ([True, False], 'categorical'),
        "drop_out": ([0.0, 0.5], 'uniform'),
        "layers": ([[768, 64, 3],
                    [768, 128, 3],
                    [768, 64, 64, 3],
                    [768, 128, 64, 3],
                    [768, 512, 32, 3],
                    [768, 512, 128, 3],
                    [768, 512, 128, 64, 3]], 'custom')  # Layer dimensions, including an input and output layer.
    }
}

def suggest_hyperparameters(trial, hyperparams):
    params = {}
    for key, value in hyperparams.items():
        if len(value) == 2 and value[1] == 'categorical':
            params[key] = trial.suggest_categorical(key, value[0])
        elif len(value) == 3:
            if value[2] == 'loguniform':
                params[key] = trial.suggest_float(key, value[0], value[1], log=True)
            elif value[2] == 'uniform':
                params[key] = trial.suggest_float(key, value[0], value[1])
            elif value[2] == 'int':
                params[key] = trial.suggest_int(key, value[0], value[1])
            elif value[2] == 'categorical':
                params[key] = trial.suggest_categorical(key, value[0])
            elif value[1] == 'custom':
                hidden_dims = params['hidden_dims']
                layer_count = len(hidden_dims)
                params[key] = trial.suggest_categorical(key, value[0][layer_count])
            else:
                raise ValueError(f"Hyperparameter tuple for {key} is not in the expected format: {value}")
    return params

def cross_validation(estimator, data, n_splits=10):
    if estimator.model_type in ['svm', 'xgboost']:
        x, y = data[1][0], data[1][1]

        cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
        scores = []
        for i, (train_index, val_index) in enumerate(cv.split(x, y)):
            # Split to train and validation sets
            x_train, x_val = x[train_index], x[val_index]
            y_train, y_val = y[train_index], y[val_index]

            # Fit to the Classifier train and predict data type
            train = ('shadow', (x_train, y_train))
            val = ('shadow', (x_val, y_val))

            estimator.fit(train)
            pred = estimator.predict(val)
            score = f1_score(y_val, pred, average='micro')
            scores.append(score)
        return scores

# Define objective function for optuna. The function include all models, and should be called with the model name. The function optimize the Classifier class hyperparameters.
def objective(trial, model_name, data, folds_scores):
    params = suggest_hyperparameters(trial, model_hyperparameters[model_name])

    if model_name == 'logistic_regression':
        params['num_epochs'] = 1
        params['batch_norm'] = False
        params['drop_out'] = 0.0
        params['layers'] = [768, 3]

    # Add some more parameters for XGBoost
    if model_name == 'xgboost':
        if params["booster"] in ["gbtree", "dart"]:
            # maximum depth of the tree, signifies complexity of the tree.
            params["max_depth"] = trial.suggest_int("max_depth", 3, 9, step=2)
            # minimum child weight, larger the term more conservative the tree.
            params["min_child_weight"] = trial.suggest_int("min_child_weight", 2, 10)
            params["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
            # defines how selective algorithm is.
            params["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
            params["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])

        if params["booster"] == "dart":
            params["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
            params["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
            params["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
            params["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)


    model = Classifier(params, model_type=model_name)
    # model.fit(data)

    # Create a pipeline with just the classifier since feature prep is external
    pipeline = Pipeline([
        ('classifier', model)
    ])

    # Perform cross validation
    if model_name in ['svm', 'xgboost']:
        scores = cross_validation(model, data, n_splits=10)
    else:
        scores = cross_val_score(pipeline, data, cv=cv, scoring='f1')

    folds_scores.append(scores)     # Save scores for statistic tests
    return np.mean(scores)

def optimize_model(model_name, data, n_trials=50, timout=1200):
    """
    The actual optimization.
    """
    folds_scores = []   # create a list to store the scores from each trial folds
    study = optuna.create_study(direction='maximize')
    progress_bar = TqdmCallback(n_trials)
    study.optimize(lambda trial: objective(trial, model_name, data, folds_scores), n_trials=n_trials, timeout=timout, callbacks=[progress_bar])
    # Close progress bar
    progress_bar.close()

    best_params = study.best_params
    best_value = study.best_value

    print(f"Best hyperparameters for {model_name}: {best_params}")
    print(f"Best F1 score for {model_name}: {best_value}")

    return best_params, best_value, folds_scores

# Optimize models

## Create Datasets

In [9]:
# Create 4 different datasets: embedding with and without augmentation, and tf-idf with and without augmentation.
embedder = Embedder()
data_without_augmentation = TextDataset(
    data_path=DATA_PATH,
    subset=SUBSET,
    id_column_idx=ID_COLUMN_IDX,
    comment_column_idx=COMMENT_COLUMN_IDX,
    label_column_idx=LABEL_COLUMN_IDX,
    subset_column_idx=SUBSET_COLUMN_IDX,
    augmented_classes=AUGMENTED_CLASSES,
    augmentation_ratio=AUGMENTATION_RATIO,
    augmentation_methods=AUGMENTATION_METHODS,
    adversation_ratio = ADVERSATION_RATIO
)

data_with_augmentation = TextDataset(
    data_path=DATA_PATH,
    subset=SUBSET,
    id_column_idx=ID_COLUMN_IDX,
    comment_column_idx=COMMENT_COLUMN_IDX,
    label_column_idx=LABEL_COLUMN_IDX,
    subset_column_idx=SUBSET_COLUMN_IDX,
    augmented_classes=AUGMENTED_CLASSES,
    augmentation_ratio=5,
    augmentation_methods=AUGMENTATION_METHODS,
    adversation_ratio = ADVERSATION_RATIO
)

bert_embedding_no_augmentation_loader = get_dataloader(
    dataset=data_without_augmentation,
    embedder=embedder,
    datashape='embedding',
    embedding_method=EMBEDDING_METHOD,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

bert_embedding_with_augmentation_loader = get_dataloader(
    dataset=data_with_augmentation,
    embedder=embedder,
    datashape='embedding',
    embedding_method=EMBEDDING_METHOD,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

tfidf_embedding_no_augmentation_loader = get_dataloader(
    dataset=data_without_augmentation,
    embedder=embedder,
    datashape='embedding',
    embedding_method='tf-idf',
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

tfidf_embedding_with_augmentation_loader = get_dataloader(
    dataset=data_with_augmentation,
    embedder=embedder,
    datashape='embedding',
    embedding_method='tf-idf',
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

[Dataset Status]: Loading the dataset...



Preprocessing comments:   0%|          | 0/6637 [00:00<?, ?it/s][A
Preprocessing comments:  42%|████▏     | 2788/6637 [00:00<00:00, 27647.01it/s][A
Preprocessing comments: 100%|██████████| 6637/6637 [00:00<00:00, 26681.72it/s][A


[Dataset Status]: No Augmentation was chosen (augmentation/ adversation ratio == 0 or no augmented_classes). Moving on...
[Dataset Status]: Loading the dataset...



Preprocessing comments:   0%|          | 0/6637 [00:00<?, ?it/s][A
Preprocessing comments:  42%|████▏     | 2790/6637 [00:00<00:00, 27720.92it/s][A
Preprocessing comments: 100%|██████████| 6637/6637 [00:00<00:00, 29216.04it/s][A

Augmenting data:   0%|          | 0/6537 [00:00<?, ?row/s][A
Augmenting data:   0%|          | 10/6537 [00:00<01:06, 98.12row/s][A
Augmenting data:   1%|          | 43/6537 [00:00<00:29, 220.52row/s][A
Augmenting data:   2%|▏         | 111/6537 [00:00<00:15, 420.15row/s][A
Augmenting data:   2%|▏         | 154/6537 [00:00<00:19, 320.37row/s][A
Augmenting data:   3%|▎         | 190/6537 [00:00<00:20, 317.01row/s][A
Augmenting data:   3%|▎         | 224/6537 [00:00<00:24, 261.43row/s][A
Augmenting data:   4%|▍         | 253/6537 [00:00<00:23, 263.05row/s][A
Augmenting data:   4%|▍         | 281/6537 [00:01<00:28, 217.32row/s][A
Augmenting data:   5%|▌         | 329/6537 [00:01<00:22, 275.00row/s][A
Augmenting data:   6%|▌         | 395/6537 [00:01<

[Dataloader Status]: Loading the dataset...
[EmbeddingDataset]: Loading precomputed embeddings from C:\Users\amita\PycharmProjects\Israel-Palestine-Political-Affiliation-Text-Classification\Data\subset B_augmentation=0_embeddings_distilbert.pkl...
[Dataloader Status]: Done.
[Dataloader Status]: Loading the dataset...
[EmbeddingDataset]: Loading precomputed embeddings from C:\Users\amita\PycharmProjects\Israel-Palestine-Political-Affiliation-Text-Classification\Data\subset B_augmentation=5_embeddings_distilbert.pkl...
[Dataloader Status]: Done.
[Dataloader Status]: Loading the dataset...
[EmbeddingDataset]: Loading precomputed embeddings from C:\Users\amita\PycharmProjects\Israel-Palestine-Political-Affiliation-Text-Classification\Data\subset B_augmentation=0_embeddings_tf-idf.pkl...
[Dataloader Status]: Done.
[Dataloader Status]: Loading the dataset...
[EmbeddingDataset]: Loading precomputed embeddings from C:\Users\amita\PycharmProjects\Israel-Palestine-Political-Affiliation-Text-Clas

In [36]:
from keras.utils.np_utils import to_categorical
print(to_categorical(bert_embedding_no_augmentation_loader[1][1]))

ModuleNotFoundError: No module named 'tensorflow'

## Logistic Regression

In [16]:
lr_results = {}
lr_results['bert_without_augmentation'] = optimize_model('logistic_regression', bert_embedding_no_augmentation_loader)

[I 2025-01-19 21:41:08,942] A new study created in memory with name: no-name-defd2a96-81d0-4139-853a-37390cef2353

  0%|          | 0/50 [00:00<?, ?it/s][A

[Model Fit Status]: Fitting the model...


[W 2025-01-19 21:41:33,954] Trial 0 failed with parameters: {'learning_rate': 3.235498268960674e-05, 'weight_decay': 2.1010050203907292e-05} because of the following error: ValueError('n_splits=10 cannot be greater than the number of members in each class.').
Traceback (most recent call last):
  File "C:\Users\amita\PycharmProjects\Israel-Palestine-Political-Affiliation-Text-Classification\venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\amita\AppData\Local\Temp\ipykernel_13260\634846785.py", line 109, in <lambda>
    study.optimize(lambda trial: objective(trial, model_name, data, folds_scores), n_trials=n_trials, timeout=timout, callbacks=[progress_bar])
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\amita\AppData\Local\Temp\ipykernel_13260\634846785.py", line 95, in objective
    scores = cross_val_score(pipeline, data[1][0

Epoch 1: Loss = 0.9919712543487549


ValueError: n_splits=10 cannot be greater than the number of members in each class.

In [33]:

lr_results['bert_with_augmentation'] = optimize_model('logistic_regression', bert_embedding_with_augmentation_loader)
lr_results['tfidf_without_augmentation'] = optimize_model('logistic_regression', tfidf_embedding_no_augmentation_loader)
lr_results['tfidf_with_augmentation'] = optimize_model('logistic_regression', tfidf_embedding_with_augmentation_loader)

[I 2025-01-16 21:49:56,833] A new study created in memory with name: no-name-d8e117c0-2438-45a7-9c42-d6e67c163556

  0%|          | 0/50 [00:00<?, ?it/s][A[W 2025-01-16 21:50:18,034] Trial 0 failed with parameters: {'C': 0.0009880967647562218} because of the following error: ValueError('not enough values to unpack (expected 3, got 2)').
Traceback (most recent call last):
  File "C:\Users\amita\PycharmProjects\Israel-Palestine-Political-Affiliation-Text-Classification\venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\amita\AppData\Local\Temp\ipykernel_13200\3548785863.py", line 107, in <lambda>
    study.optimize(lambda trial: objective(trial, model_name, data, folds_scores), n_trials=n_trials, timeout=timout, callbacks=[progress_bar])
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\amita\AppData\Local\Temp\ipykernel_13200\3548

ValueError: not enough values to unpack (expected 3, got 2)

In [None]:
print("Logistic Regression results:\n\n")
print(f"Using BERT embeddings without augmentation scores: {lr_results['bert_without_augmentation'][2]}")
print(f"Using BERT embeddings without augmentation best score: {lr_results['bert_without_augmentation'][1]}")
print(f"Using BERT embeddings without augmentation best parameters: {lr_results['bert_without_augmentation'][0]}\n\n")
print(f"Using BERT embeddings with augmentation scores: {lr_results['bert_with_augmentation'][2]}")
print(f"Using BERT embeddings with augmentation best score: {lr_results['bert_with_augmentation'][1]}")
print(f"Using BERT embeddings with augmentation best parameters: {lr_results['bert_with_augmentation'][0]}\n\n")
print(f"Using TF-IDF embeddings without augmentation scores: {lr_results['tfidf_without_augmentation'][2]}")
print(f"Using TF-IDF embeddings without augmentation best score: {lr_results['tfidf_without_augmentation'][1]}")
print(f"Using TF-IDF embeddings without augmentation best parameters: {lr_results['tfidf_without_augmentation'][0]}\n\n")
print(f"Using TF-IDF embeddings with augmentation scores: {lr_results['tfidf_with_augmentation'][2]}")
print(f"Using TF-IDF embeddings with augmentation best score: {lr_results['tfidf_with_augmentation'][1]}")
print(f"Using TF-IDF embeddings with augmentation best parameters: {lr_results['tfidf_with_augmentation'][0]}\n\n")

## SVM

In [14]:
svm_results = {}
svm_results['bert_without_augmentation'] = optimize_model('svm', bert_embedding_no_augmentation_loader)

[I 2025-01-19 23:00:45,559] A new study created in memory with name: no-name-15cdaa55-a6b7-45e4-9156-b2a084cb82e6



  0%|          | 0/50 [00:00<?, ?it/s][A[A[A

[Model Fit Status]: Fitting the model...
[Model Pred Status]: Generating predictions...
[Model Fit Status]: Fitting the model...
[Model Pred Status]: Generating predictions...
[Model Fit Status]: Fitting the model...
[Model Pred Status]: Generating predictions...
[Model Fit Status]: Fitting the model...
[Model Pred Status]: Generating predictions...
[Model Fit Status]: Fitting the model...
[Model Pred Status]: Generating predictions...
[Model Fit Status]: Fitting the model...
[Model Pred Status]: Generating predictions...
[Model Fit Status]: Fitting the model...
[Model Pred Status]: Generating predictions...
[Model Fit Status]: Fitting the model...
[Model Pred Status]: Generating predictions...
[Model Fit Status]: Fitting the model...
[Model Pred Status]: Generating predictions...
[Model Fit Status]: Fitting the model...
[Model Pred Status]: Generating predictions...


[W 2025-01-19 23:58:40,266] Trial 0 failed with parameters: {'C': 44.564089632299975, 'kernel': 'linear', 'degree': 5, 'gamma': 'auto'} because of the following error: AttributeError("'list' object has no attribute 'mean'").
Traceback (most recent call last):
  File "C:\Users\amita\PycharmProjects\Israel-Palestine-Political-Affiliation-Text-Classification\venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\amita\AppData\Local\Temp\ipykernel_13340\2560151948.py", line 128, in <lambda>
    study.optimize(lambda trial: objective(trial, model_name, data, folds_scores), n_trials=n_trials, timeout=timout, callbacks=[progress_bar])
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\amita\AppData\Local\Temp\ipykernel_13340\2560151948.py", line 119, in objective
    return scores.mean()
           ^^^^^^^^^^^
AttributeError: 'list' object ha

AttributeError: 'list' object has no attribute 'mean'

In [47]:
svm_results['bert_with_augmentation'] = optimize_model('svm', bert_embedding_with_augmentation_loader)
svm_results['tfidf_without_augmentation'] = optimize_model('svm', tfidf_embedding_no_augmentation_loader)
svm_results['tfidf_with_augmentation'] = optimize_model('svm', tfidf_embedding_with_augmentation_loader)

In [None]:
print("SVM results:\n\n")
print(f"Using BERT embeddings without augmentation scores: {svm_results['bert_without_augmentation'][2]}")
print(f"Using BERT embeddings without augmentation best score: {svm_results['bert_without_augmentation'][1]}")
print(f"Using BERT embeddings without augmentation best parameters: {svm_results['bert_without_augmentation'][0]}\n\n")
print(f"Using BERT embeddings with augmentation scores: {svm_results['bert_with_augmentation'][2]}")
print(f"Using BERT embeddings with augmentation best score: {svm_results['bert_with_augmentation'][1]}")
print(f"Using BERT embeddings with augmentation best parameters: {svm_results['bert_with_augmentation'][0]}\n\n")
print(f"Using TF-IDF embeddings without augmentation scores: {svm_results['tfidf_without_augmentation'][2]}")
print(f"Using TF-IDF embeddings without augmentation best score: {svm_results['tfidf_without_augmentation'][1]}")
print(f"Using TF-IDF embeddings without augmentation best parameters: {svm_results['tfidf_without_augmentation'][0]}\n\n")
print(f"Using TF-IDF embeddings with augmentation scores: {svm_results['tfidf_with_augmentation'][2]}")
print(f"Using TF-IDF embeddings with augmentation best score: {svm_results['tfidf_with_augmentation'][1]}")
print(f"Using TF-IDF embeddings with augmentation best parameters: {svm_results['tfidf_with_augmentation'][0]}\n\n")


## XGBoost

In [None]:
xgb_results = {}
xgb_results['bert_without_augmentation'] = optimize_model('xgboost', bert_embedding_no_augmentation_loader)

In [53]:
xgb_results['bert_with_augmentation'] = optimize_model('xgboost', bert_embedding_with_augmentation_loader)
xgb_results['tfidf_without_augmentation'] = optimize_model('xgboost', tfidf_embedding_no_augmentation_loader)
xgb_results['tfidf_with_augmentation'] = optimize_model('xgboost', tfidf_embedding_with_augmentation_loader)

In [None]:
print("XGBoost results:\n\n")
print(f"Using BERT embeddings without augmentation scores: {xgb_results['bert_without_augmentation'][2]}")
print(f"Using BERT embeddings without augmentation best score: {xgb_results['bert_without_augmentation'][1]}")
print(f"Using BERT embeddings without augmentation best parameters: {xgb_results['bert_without_augmentation'][0]}\n\n")
print(f"Using BERT embeddings with augmentation scores: {xgb_results['bert_with_augmentation'][2]}")
print(f"Using BERT embeddings with augmentation best score: {xgb_results['bert_with_augmentation'][1]}")
print(f"Using BERT embeddings with augmentation best parameters: {xgb_results['bert_with_augmentation'][0]}\n\n")
print(f"Using TF-IDF embeddings without augmentation scores: {xgb_results['tfidf_without_augmentation'][2]}")
print(f"Using TF-IDF embeddings without augmentation best score: {xgb_results['tfidf_without_augmentation'][1]}")
print(f"Using TF-IDF embeddings without augmentation best parameters: {xgb_results['tfidf_without_augmentation'][0]}\n\n")
print(f"Using TF-IDF embeddings with augmentation scores: {xgb_results['tfidf_with_augmentation'][2]}")
print(f"Using TF-IDF embeddings with augmentation best score: {xgb_results['tfidf_with_augmentation'][1]}")
print(f"Using TF-IDF embeddings with augmentation best parameters: {xgb_results['tfidf_with_augmentation'][0]}\n\n")

## DNN

In [None]:
dnn_results = {}
dnn_results['bert_without_augmentation'] = optimize_model('dnn', bert_embedding_no_augmentation_loader)
dnn_results['bert_with_augmentation'] = optimize_model('dnn', bert_embedding_with_augmentation_loader)
dnn_results['tfidf_without_augmentation'] = optimize_model('dnn', tfidf_embedding_no_augmentation_loader)
dnn_results['tfidf_with_augmentation'] = optimize_model('dnn', tfidf_embedding_with_augmentation_loader)

In [None]:
print("DNN results:\n\n")
print(f"Using BERT embeddings without augmentation scores: {dnn_results['bert_without_augmentation'][2]}")
print(f"Using BERT embeddings without augmentation best score: {dnn_results['bert_without_augmentation'][1]}")
print(f"Using BERT embeddings without augmentation best parameters: {dnn_results['bert_without_augmentation'][0]}\n\n")
print(f"Using BERT embeddings with augmentation scores: {dnn_results['bert_with_augmentation'][2]}")
print(f"Using BERT embeddings with augmentation best score: {dnn_results['bert_with_augmentation'][1]}")
print(f"Using BERT embeddings with augmentation best parameters: {dnn_results['bert_with_augmentation'][0]}\n\n")
print(f"Using TF-IDF embeddings without augmentation scores: {dnn_results['tfidf_without_augmentation'][2]}")
print(f"Using TF-IDF embeddings without augmentation best score: {dnn_results['tfidf_without_augmentation'][1]}")
print(f"Using TF-IDF embeddings without augmentation best parameters: {dnn_results['tfidf_without_augmentation'][0]}\n\n")
print(f"Using TF-IDF embeddings with augmentation scores: {dnn_results['tfidf_with_augmentation'][2]}")
print(f"Using TF-IDF embeddings with augmentation best score: {dnn_results['tfidf_with_augmentation'][1]}")
print(f"Using TF-IDF embeddings with augmentation best parameters: {dnn_results['tfidf_with_augmentation'][0]}\n\n")