In [None]:
!pip install simpletransformers scikit-learn xgboost optuna

import pandas as pd
from simpletransformers.classification import ClassificationModel
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
import logging
from sklearn.model_selection import KFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np
import joblib
import os
import optuna
import xgboost as xgb

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Set up logging to capture outputs
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

# Load datasets
olid_train = pd.read_csv('/content/drive/MyDrive/sem4/sem4/olid-train-small.csv')
hasoc_train = pd.read_csv('/content/drive/MyDrive/sem4/sem4/hasoc-train.csv')
olid_test = pd.read_csv('/content/drive/MyDrive/sem4/sem4/olid-test.csv')

# Prepare OLID and HASOC train data
olid_train_data = pd.DataFrame({
    'text': olid_train['text'],
    'labels': olid_train['labels']
})

hasoc_train_data = pd.DataFrame({
    'text': hasoc_train['text'],
    'labels': hasoc_train['labels']
})

# Prepare OLID test data
olid_test_data = pd.DataFrame({
    'text': olid_test['text'],
    'labels': olid_test['labels']
})

# In-domain Models
roberta = "/content/drive/MyDrive/results/OLID_roberta-base_lr_2e-05_bs_16_epochs_5"
bert = "/content/drive/MyDrive/results/OLID_bert-base-cased_lr_1e-05_bs_16_epochs_5"
hatebert = "/content/drive/MyDrive/results/OLID_GroNLP/hateBERT_lr_1e-05_bs_16_epochs_5"
distrillbert = "/content/drive/MyDrive/results/OLID_distilbert-base-cased_lr_1e-05_bs_16_epochs_5"
# xlnet = ""

# Cross-domain Models
cross_roberta = "/content/drive/MyDrive/results/HASOC_roberta-base_lr_1e-05_bs_16_epochs_5"
cross_bert = "/content/drive/MyDrive/results/HASOC_bert-base-cased_lr_2e-05_bs_16_epochs_5"
cross_hatebert = "/content/drive/MyDrive/results/HASOC_GroNLP/hateBERT_lr_1e-05_bs_16_epochs_5"
cross_distrillbert = "/content/drive/MyDrive/results/HASOC_distilbert-base-cased_lr_1e-05_bs_16_epochs_5"
# xlnet_cross = ""

# Declare the in-domain stacks to try
in_domain_stack_one = [roberta, bert, hatebert]
in_domain_stack_two = [roberta, distrillbert, bert]

# Declare the cross domains stacks to try
cross_domain_stack_one = [cross_roberta, cross_bert, cross_hatebert]
cross_domain_stack_two = [cross_roberta, cross_distrillbert, cross_bert]

In [None]:
# Define the Optuna objective function based on the meta-model type
def objective(trial, meta_model_type, meta_features, y_train):
    if meta_model_type == 'logistic':
        # Suggest hyperparameters for Logistic Regression
        C = trial.suggest_loguniform('C', 1e-3, 1e2)  # Regularization strength
        penalty = trial.suggest_categorical('penalty', ['l2', None])  # Regularization penalty
        solver = trial.suggest_categorical('solver', ['lbfgs', 'saga'])  # Solver algorithm
        meta_model = LogisticRegression(C=C, penalty=penalty, solver=solver)

    elif meta_model_type == 'xgboost':
        # Suggest hyperparameters for XGBoost
        n_estimators = trial.suggest_int('n_estimators', 50, 200)
        max_depth = trial.suggest_int('max_depth', 3, 10)
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1e-1)
        subsample = trial.suggest_uniform('subsample', 0.5, 1.0)
        colsample_bytree = trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
        meta_model = xgb.XGBClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            use_label_encoder=False
        )

    elif meta_model_type == 'random_forest':
        # Suggest hyperparameters for RandomForestClassifier
        n_estimators = trial.suggest_int('n_estimators', 50, 200)
        max_depth = trial.suggest_int('max_depth', 5, 20)
        min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
        min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 4)
        meta_model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf
        )

    # Perform cross-validation on the meta-model
    f1_scores = cross_val_score(meta_model, meta_features, y_train, cv=5, scoring='f1')
    return f1_scores.mean()

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import joblib
from simpletransformers.classification import ClassificationModel
import optuna
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier

# Define the Optuna objective function for hyperparameter optimization
def objective(trial, meta_model_type, meta_features, y_train):
    if meta_model_type == 'logistic':
        C = trial.suggest_loguniform('C', 1e-3, 1e2)
        penalty = trial.suggest_categorical('penalty', ['l2', None])
        solver = trial.suggest_categorical('solver', ['lbfgs', 'saga'])
        meta_model = LogisticRegression(C=C, penalty=penalty, solver=solver)
    elif meta_model_type == 'xgboost':
        n_estimators = trial.suggest_int('n_estimators', 50, 200)
        max_depth = trial.suggest_int('max_depth', 3, 10)
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1e-1)
        subsample = trial.suggest_uniform('subsample', 0.5, 1.0)
        colsample_bytree = trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
        meta_model = xgb.XGBClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            use_label_encoder=False
        )
    elif meta_model_type == 'random_forest':
        n_estimators = trial.suggest_int('n_estimators', 50, 200)
        max_depth = trial.suggest_int('max_depth', 5, 20)
        min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
        min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 4)
        meta_model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf
        )
    # Perform cross-validation on the meta-model
    f1_scores = cross_val_score(meta_model, meta_features, y_train, cv=5, scoring='f1')
    return f1_scores.mean()

# Main function to perform stacking ensemble with Optuna optimization
def stacking_ensemble_with_optuna(models, train_data, eval_data, meta_model_type='logistic', k_folds=5, save_path=""):
    save_path = f"{save_path}_{meta_model_type}_optuna"
    os.makedirs(save_path, exist_ok=True)

    kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
    X_train = np.array(train_data['text'])
    y_train = np.array(train_data['labels'])
    X_eval = np.array(eval_data['text'])
    y_eval = np.array(eval_data['labels'])

    meta_features = np.zeros((len(train_data), len(models)))
    meta_eval_features = np.zeros((len(eval_data), len(models)))

    for fold, (train_idx, val_idx) in enumerate(kfold.split(X_train)):
        print(f"\nFold {fold + 1}/{k_folds}")
        X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
        y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]

        for i, model_path in enumerate(models):
            model_type = 'roberta' if 'roberta' in model_path else 'distilbert' if 'distilbert' in model_path else 'bert'
            print(f"Loading and evaluating model: {model_path} (model type: {model_type})")
            model = ClassificationModel(model_type, model_path, use_cuda=True)

            fold_val_data = pd.DataFrame({'text': X_fold_val, 'labels': y_fold_val})
            _, model_outputs, _ = model.eval_model(fold_val_data)
            meta_features[val_idx, i] = model_outputs[:, 1]

            _, model_eval_outputs, _ = model.eval_model(eval_data)
            meta_eval_features[:, i] += model_eval_outputs[:, 1] / k_folds

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, meta_model_type, meta_features, y_train), n_trials=20)

    best_params = study.best_params
    print(f"Best hyperparameters found by Optuna for {meta_model_type}: {best_params}")

    if meta_model_type == 'logistic':
        meta_model = LogisticRegression(**best_params)
    elif meta_model_type == 'xgboost':
        meta_model = xgb.XGBClassifier(**best_params, use_label_encoder=False)
    elif meta_model_type == 'random_forest':
        meta_model = RandomForestClassifier(**best_params)

    meta_model.fit(meta_features, y_train)
    meta_predictions = meta_model.predict(meta_eval_features)

    accuracy = accuracy_score(y_eval, meta_predictions)
    precision = precision_score(y_eval, meta_predictions, average='binary')
    recall = recall_score(y_eval, meta_predictions, average='binary')
    f1 = f1_score(y_eval, meta_predictions, average='binary')

    print(f"Optimized Stacking Ensemble Accuracy: {accuracy}")
    print(f"Optimized Stacking Ensemble Precision: {precision}")
    print(f"Optimized Stacking Ensemble Recall: {recall}")
    print(f"Optimized Stacking Ensemble F1-Score: {f1}")

    metrics_file_path = f"{save_path}/optuna_stacking_ensemble_metrics.txt"
    with open(metrics_file_path, 'w') as f:
        f.write(f"Optimized Stacking Ensemble Metrics ({meta_model_type}):\n")
        f.write(f"Accuracy: {accuracy}\n")
        f.write(f"Precision: {precision}\n")
        f.write(f"Recall: {recall}\n")
        f.write(f"F1-Score: {f1}\n")

    # Save predictions for final evaluation set
    np.save(f"{save_path}/meta_eval_predictions.npy", meta_predictions)
    np.save(f"{save_path}/meta_eval_true_labels.npy", y_eval)

    meta_model_path = f"{save_path}/optuna_stacking_ensemble_model_{meta_model_type}.pkl"
    joblib.dump(meta_model, meta_model_path)
    print(f"Optimized meta-classifier saved to: {meta_model_path}")
    print(f"Predictions and labels saved for final evaluation set.")


In [None]:
# Train in-domain stacking ensemble 1 using XGBoost
stacking_ensemble_with_optuna(
    in_domain_stack_one,
    train_data=olid_train_data,
    eval_data=olid_test_data,
    meta_model_type='xgboost',
    save_path="/content/drive/MyDrive/results_retrain_opt/in_domain_stack_3"
)

# Train cross-domain stacking ensemble 1 using XGBoost
stacking_ensemble_with_optuna(
    cross_domain_stack_one,
    train_data=hasoc_train_data,
    eval_data=olid_test_data,
    meta_model_type='xgboost',
    save_path="/content/drive/MyDrive/results_retrain_opt/cross_domain_stack_3"
)

# Train in-domain stacking ensemble 2 using XGBoost
stacking_ensemble_with_optuna(
    in_domain_stack_two,
    train_data=olid_train_data,
    eval_data=olid_test_data,
    meta_model_type='xgboost',
    save_path="/content/drive/MyDrive/results_retrain_opt/in_domain_stack_4"
)

# Train cross-domain stacking ensemble 2 using XGBoost
stacking_ensemble_with_optuna(
    cross_domain_stack_two,
    train_data=hasoc_train_data,
    eval_data=olid_test_data,
    meta_model_type='xgboost',
    save_path="/content/drive/MyDrive/results_retrain_opt/cross_domain_stack_4"
)

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Root directory containing the results
root_dir = "/content/drive/MyDrive/results_retrain_opt"

# Loop through each directory and compute confusion matrices
for dirpath, dirnames, filenames in os.walk(root_dir):
    # Only proceed if both prediction and label files are in the directory
    if "meta_eval_predictions.npy" in filenames and "meta_eval_true_labels.npy" in filenames:
        # Load predictions and true labels
        predictions = np.load(os.path.join(dirpath, "meta_eval_predictions.npy"))
        true_labels = np.load(os.path.join(dirpath, "meta_eval_true_labels.npy"))

        # Compute confusion matrix
        cm = confusion_matrix(true_labels, predictions)

        # Display directory and confusion matrix
        print(f"\nConfusion Matrix for: {dirpath}")
        print(cm)

        # Plot the confusion matrix
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot(cmap="Blues")
        plt.title(f"Confusion Matrix - {os.path.basename(dirpath)}")
        plt.show()


In [None]:
import os
import pandas as pd
from sklearn.metrics import confusion_matrix

# Define the root directory where your results are stored
root_dir = "/content/drive/MyDrive/results_retrain_opt"

# List to store all result data
results = []

# Loop through each directory to gather results
for dirpath, dirnames, filenames in os.walk(root_dir):
    for filename in filenames:
        if "ensemble_metrics.txt" in filename:
            # Determine if the result is optimized
            is_optimized = "optuna" in dirpath

            # Determine the model type based on folder name
            model_type = "logistic" if "logistic" in dirpath else "random_forest" if "random_forest" in dirpath else "xgboost"

            # Determine the domain type
            domain_type = "cross-domain" if "cross_domain" in dirpath else "in-domain"

            # Identify the stack type based on directory structure
            if "stack_one" in dirpath:
                stack_type = "stack_one"
            elif "stack_3" in dirpath:
                stack_type = "stack_two"
            elif "stack_4" in dirpath:
                stack_type = "stack_three"
            else:
                stack_type = "Unknown"  # Default if stack type is unclear

            # Read metrics from the file
            with open(os.path.join(dirpath, filename), 'r') as file:
                content = file.readlines()
                accuracy = float(content[1].strip().split(": ")[1])
                precision = float(content[2].strip().split(": ")[1])
                recall = float(content[3].strip().split(": ")[1])
                f1_score = float(content[4].strip().split(": ")[1])

            # Append the results with stack type
            # Load predictions and true labels
            predictions = np.load(os.path.join(dirpath, "meta_eval_predictions.npy"))
            true_labels = np.load(os.path.join(dirpath, "meta_eval_true_labels.npy"))

            # Calculate confusion matrix and extract components
            tn, fp, fn, tp = confusion_matrix(true_labels, predictions).ravel()

            # Append all metrics, including confusion matrix components
            results.append({
                "Domain": domain_type,
                "Model Type": model_type,
                "Stack Type": stack_type,
                "Accuracy": accuracy,
                "Precision": precision,
                "Recall": recall,
                "F1-Score": f1_score,
                "True Positives": tp,
                "False Positives": fp,
                "True Negatives": tn,
                "False Negatives": fn
            })

# Convert to DataFrame and display
results_df = pd.DataFrame(results)

# Replace any remaining NaN values in Stack Type with 'Unknown' (just in case)
results_df['Stack Type'].fillna('Unknown', inplace=True)

# Sort by Domain, Model Type, Stack Type, and Optimized for better structure
results_df = results_df.sort_values(by=["Domain", "Model Type", "Stack Type"], ascending=[True, True, True])

print("\n===== Stacking Ensemble Results =====\n")
print(results_df)


In [None]:
# Round decimals to 3 places
results_df = results_df.round(3)

# Set up figure for creating the table image
import matplotlib.pyplot as plt
from pandas.plotting import table

fig, ax = plt.subplots(figsize=(12, len(results_df) * 0.5))  # Adjust height based on the number of rows
ax.axis('off')  # Hide the main axes

# Create the table from the DataFrame
tbl = table(ax, results_df, loc='center', cellLoc='center', colWidths=[0.1] * len(results_df.columns))

# Styling
tbl.auto_set_font_size(False)
tbl.set_fontsize(10)
tbl.scale(1.2, 1.2)  # Adjust scale for readability

# Save as an image
plt.savefig('/content/drive/MyDrive/Stacking_Ensemble_Results_Table_Rounded.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
# import pandas as pd
# from transformers import AutoModelForSequenceClassification, AutoTokenizer
# from scipy.stats import mode
# import torch
# import numpy as np
# from sklearn.metrics import classification_report, confusion_matrix
# import logging
# from tqdm import tqdm
# import os

# # Set up logging to capture outputs
# logging.basicConfig(level=logging.INFO)
# transformers_logger = logging.getLogger("transformers")
# transformers_logger.setLevel(logging.WARNING)

# # Load datasets
# olid_test = pd.read_csv('/content/drive/MyDrive/sem4/sem4/olid-test.csv')

# # Prepare OLID test data
# olid_test_data = pd.DataFrame({
#     'text': olid_test['text'],
#     'labels': olid_test['labels']
# })

# # Define model paths for in-domain and cross-domain
# in_domain_paths = {
#     "bert": "/content/drive/MyDrive/results/OLID_bert-base-cased_lr_1e-05_bs_16_epochs_5",
#     "roberta": "/content/drive/MyDrive/results/OLID_roberta-base_lr_2e-05_bs_16_epochs_5",
#     "hatebert": "/content/drive/MyDrive/results/OLID_GroNLP/hateBERT_lr_1e-05_bs_16_epochs_5",
#     "distilbert": "/content/drive/MyDrive/results/OLID_distilbert-base-cased_lr_1e-05_bs_16_epochs_5"
# }

# cross_domain_paths = {
#     "bert": "/content/drive/MyDrive/results/HASOC_bert-base-cased_lr_2e-05_bs_16_epochs_5",
#     "roberta": "/content/drive/MyDrive/results/HASOC_roberta-base_lr_1e-05_bs_16_epochs_5",
#     "hatebert": "/content/drive/MyDrive/results/HASOC_GroNLP/hateBERT_lr_1e-05_bs_16_epochs_5",
#     "distilbert": "/content/drive/MyDrive/results/HASOC_distilbert-base-cased_lr_1e-05_bs_16_epochs_5"
# }

# # Define stack configurations
# stacks = {
#     "stack_one": ["roberta", "distilbert", "hatebert"],
#     "stack_two": ["roberta", "bert", "hatebert"],
#     "stack_three": ["roberta", "distilbert", "bert"]
# }

# # Load models based on stack configuration
# def load_stack_models(model_paths, stack):
#     models = []
#     for model_name in stack:
#         model = AutoModelForSequenceClassification.from_pretrained(model_paths[model_name]).to("cuda")
#         models.append(model)
#     return models

# # Unified voting function for Hard and Soft Majority Voting
# def majority_voting(texts, tokenizer, models, voting_type="hard", batch_size=16):
#     all_predictions = []

#     for i in tqdm(range(0, len(texts), batch_size), desc=f"{voting_type.capitalize()} Voting Prediction Batches"):
#         batch_texts = texts[i:i + batch_size]
#         encoding = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True).to("cuda")
#         input_ids, attention_mask = encoding['input_ids'], encoding['attention_mask']

#         batch_predictions = []
#         combined_probs = None

#         for model in models:
#             with torch.no_grad():
#                 logits = model(input_ids=input_ids, attention_mask=attention_mask).logits
#                 if voting_type == "hard":
#                     preds = torch.argmax(logits, dim=-1).cpu().numpy()
#                     batch_predictions.append(preds)
#                 elif voting_type == "soft":
#                     probs = torch.nn.functional.softmax(logits, dim=-1).cpu().numpy()
#                     combined_probs = probs if combined_probs is None else combined_probs + probs

#         if voting_type == "hard":
#             # Perform hard majority voting
#             batch_predictions = np.array(batch_predictions)
#             final_predictions = mode(batch_predictions, axis=0)[0].flatten()
#         elif voting_type == "soft":
#             # Average probabilities for soft voting
#             combined_probs /= len(models)
#             final_predictions = np.argmax(combined_probs, axis=1)

#         all_predictions.extend(final_predictions)
#         torch.cuda.empty_cache()

#     return all_predictions

# # Function to evaluate and save results
# def evaluate_and_save_results(predictions, labels, voting_type, domain, stack_name):
#     report = classification_report(labels, predictions)
#     confusion = confusion_matrix(labels, predictions)

#     # Print results
#     print(f"{voting_type.capitalize()} Majority Voting Performance ({domain.capitalize()} - {stack_name}):")
#     print(report)
#     print(confusion)

#     # Save results
#     os.makedirs(f'/content/drive/MyDrive/final_results/{stack_name}', exist_ok=True)
#     filename = f'/content/drive/MyDrive/final_results/{stack_name}/{voting_type}_voting_results_{domain}.txt'
#     with open(filename, 'w') as f:
#         f.write(f"{voting_type.capitalize()} Majority Voting Performance ({domain.capitalize()} - {stack_name}):\n")
#         f.write(report)
#         f.write("\nConfusion Matrix:\n")
#         f.write(np.array2string(confusion))

# # Run Hard and Soft Majority Voting for each stack in In-Domain and Cross-Domain
# for stack_name, stack_models in stacks.items():
#     in_domain_models = load_stack_models(in_domain_paths, stack_models)
#     cross_domain_models = load_stack_models(cross_domain_paths, stack_models)

#     for domain, models, tokenizer in [
#         ("in_domain", in_domain_models, AutoTokenizer.from_pretrained(in_domain_paths["bert"])),
#         ("cross_domain", cross_domain_models, AutoTokenizer.from_pretrained(cross_domain_paths["bert"]))
#     ]:
#         for voting_type in ["hard", "soft"]:
#             predictions = majority_voting(olid_test_data['text'].tolist(), tokenizer, models, voting_type=voting_type)
#             evaluate_and_save_results(predictions, olid_test_data['labels'], voting_type, domain, stack_name)


In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from tqdm import tqdm

# Define directories and stacks
root_dir = "/content/drive/MyDrive/final_results"
olid_test = pd.read_csv('/content/drive/MyDrive/sem4/sem4/olid-test.csv')
olid_test_data = pd.DataFrame({'text': olid_test['text'], 'labels': olid_test['labels']})

in_domain_paths = {
    "bert": "/content/drive/MyDrive/results/OLID_bert-base-cased_lr_1e-05_bs_16_epochs_5",
    "roberta": "/content/drive/MyDrive/results/OLID_roberta-base_lr_2e-05_bs_16_epochs_5",
    "hatebert": "/content/drive/MyDrive/results/OLID_GroNLP/hateBERT_lr_1e-05_bs_16_epochs_5",
    "distilbert": "/content/drive/MyDrive/results/OLID_distilbert-base-cased_lr_1e-05_bs_16_epochs_5"
}

cross_domain_paths = {
    "bert": "/content/drive/MyDrive/results/HASOC_bert-base-cased_lr_2e-05_bs_16_epochs_5",
    "roberta": "/content/drive/MyDrive/results/HASOC_roberta-base_lr_1e-05_bs_16_epochs_5",
    "hatebert": "/content/drive/MyDrive/results/HASOC_GroNLP/hateBERT_lr_1e-05_bs_16_epochs_5",
    "distilbert": "/content/drive/MyDrive/results/HASOC_distilbert-base-cased_lr_1e-05_bs_16_epochs_5"
}

stacks = {
    "stack_one": ["roberta", "distilbert", "hatebert"],
    "stack_two": ["roberta", "bert", "hatebert"],
    "stack_three": ["roberta", "distilbert", "bert"]
}

def load_stack_models(model_paths, stack):
    models = []
    for model_name in stack:
        model = AutoModelForSequenceClassification.from_pretrained(model_paths[model_name]).to("cuda")
        models.append(model)
    return models

def majority_voting(texts, tokenizer, models, voting_type="hard", batch_size=16):
    all_predictions = []
    for i in tqdm(range(0, len(texts), batch_size), desc=f"{voting_type.capitalize()} Voting Prediction Batches"):
        batch_texts = texts[i:i + batch_size]
        encoding = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True).to("cuda")
        input_ids, attention_mask = encoding['input_ids'], encoding['attention_mask']

        batch_predictions = []
        combined_probs = None
        for model in models:
            with torch.no_grad():
                logits = model(input_ids=input_ids, attention_mask=attention_mask).logits
                if voting_type == "hard":
                    preds = torch.argmax(logits, dim=-1).cpu().numpy()
                    batch_predictions.append(preds)
                elif voting_type == "soft":
                    probs = torch.nn.functional.softmax(logits, dim=-1).cpu().numpy()
                    combined_probs = probs if combined_probs is None else combined_probs + probs

        if voting_type == "hard":
            final_predictions = mode(np.array(batch_predictions), axis=0)[0].flatten()
        elif voting_type == "soft":
            combined_probs /= len(models)
            final_predictions = np.argmax(combined_probs, axis=1)

        all_predictions.extend(final_predictions)
        torch.cuda.empty_cache()

    return all_predictions

def save_results_to_csv(predictions, labels, voting_type, domain, stack_name):
    # Calculate metrics
    report = classification_report(labels, predictions, output_dict=True)
    cm = confusion_matrix(labels, predictions)
    tn, fp, fn, tp = cm.ravel()

    # Prepare result row
    result_row = {
        "Domain": domain,
        "Voting Type": voting_type,
        "Stack Type": stack_name,
        "Accuracy": report["accuracy"],
        "Precision": report["weighted avg"]["precision"],
        "Recall": report["weighted avg"]["recall"],
        "F1-Score": report["weighted avg"]["f1-score"],
        "True Positives": tp,
        "False Positives": fp,
        "True Negatives": tn,
        "False Negatives": fn
    }

    # Save result row to CSV
    result_file = f"/content/drive/MyDrive/final_results/voting_ensemble_results.csv"
    if not os.path.exists(result_file):
        pd.DataFrame([result_row]).to_csv(result_file, index=False)
    else:
        pd.DataFrame([result_row]).to_csv(result_file, mode='a', header=False, index=False)

# Run Hard and Soft Majority Voting for each stack in In-Domain and Cross-Domain
for stack_name, stack_models in stacks.items():
    in_domain_models = load_stack_models(in_domain_paths, stack_models)
    cross_domain_models = load_stack_models(cross_domain_paths, stack_models)

    for domain, models, tokenizer in [
        ("in_domain", in_domain_models, AutoTokenizer.from_pretrained(in_domain_paths["bert"])),
        ("cross_domain", cross_domain_models, AutoTokenizer.from_pretrained(cross_domain_paths["bert"]))
    ]:
        for voting_type in ["hard", "soft"]:
            predictions = majority_voting(olid_test_data['text'].tolist(), tokenizer, models, voting_type=voting_type)
            save_results_to_csv(predictions, olid_test_data['labels'], voting_type, domain, stack_name)


In [None]:
import pandas as pd

# Path to the voting results CSV file generated by the voting code
voting_results_csv_path = "/content/drive/MyDrive/final_results/voting_ensemble_results.csv"

# Load the CSV file into a DataFrame
voting_results_df = pd.read_csv(voting_results_csv_path)

# Drop rows with any NaN values
voting_results_df = voting_results_df.dropna()

# Sort by Domain, Voting Type, and Stack Type for better organization
voting_results_df = voting_results_df.sort_values(by=["Domain", "Voting Type", "Stack Type"], ascending=[True, True, True])

# Display the sorted table
print("\n===== Voting Ensemble Results (Without NaNs) =====\n")
print(voting_results_df)

# Optionally, save the sorted DataFrame without NaNs
voting_results_df.to_csv(voting_results_csv_path, index=False)
print(f"\nSorted results saved to: {voting_results_csv_path}")


In [None]:
# Round decimals to 3 places
voting_results_df = voting_results_df.round(3)

# Set up figure for creating the table image
import matplotlib.pyplot as plt
from pandas.plotting import table

fig, ax = plt.subplots(figsize=(12, len(voting_results_df) * 0.5))  # Adjust height based on the number of rows
ax.axis('off')  # Hide the main axes

# Create the table from the DataFrame
tbl = table(ax, voting_results_df, loc='center', cellLoc='center', colWidths=[0.1] * len(voting_results_df.columns))

# Styling
tbl.auto_set_font_size(False)
tbl.set_fontsize(10)
tbl.scale(1.2, 1.2)  # Adjust scale for readability

# Save as an image
plt.savefig('/content/drive/MyDrive/Votting_Results_Table_Rounded.png', dpi=300, bbox_inches='tight')
plt.show()
