In [1]:
import numpy as np
import pandas as pd
from prettytable import PrettyTable
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import DistilBertTokenizer, DistilBertModel
import torch
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.exceptions import ConvergenceWarning
from warnings import simplefilter

# Suppress warnings
simplefilter("ignore", category=UserWarning)
simplefilter("ignore", category=UserWarning, append=True)
simplefilter("ignore", category=ConvergenceWarning, append=True)

def load_and_preprocess_data(data_path):
    bug_reports = pd.read_csv(data_path)

    bug_reports['creation_date'] = pd.to_datetime(bug_reports['creation_date'], errors='coerce')
    bug_reports['resolution_date'] = pd.to_datetime(bug_reports['resolution_date'])
    bug_reports['bug_fix_time'] = (bug_reports['resolution_date'] - bug_reports['creation_date']).dt.days
    bug_reports = bug_reports.dropna(subset=['bug_fix_time', 'short_description'])

    threshold = 10
    bug_reports['bug_class'] = np.where(bug_reports['bug_fix_time'] <= threshold, 'short-lived', 'long-lived')

    return bug_reports

def extract_tfidf_features(bug_reports):
    tfidf_vectorizer = TfidfVectorizer(max_features=128, stop_words='english')
    tfidf_features = tfidf_vectorizer.fit_transform(bug_reports['short_description']).toarray()

    return tfidf_features

def extract_distilbert_features(bug_reports):
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    model = DistilBertModel.from_pretrained('distilbert-base-uncased')

    def extract_distilbert_feature(text):
        inputs = tokenizer(text, return_tensors="pt", truncation=True)
        outputs = model(**inputs)
        return outputs.last_hidden_state.mean(dim=1).squeeze().detach().numpy()

    distilbert_features = np.array(bug_reports['short_description'].apply(extract_distilbert_feature).tolist())
    distilbert_features = torch.tensor(distilbert_features)

    return distilbert_features

def train_and_evaluate_classifier(X_train, X_test, y_train, y_test, classifier, feature_type):
    classifier.fit(X_train, y_train)
    predictions = classifier.predict(X_test)

    cm = confusion_matrix(y_test, predictions)
    balanced_accuracy = np.mean([cm[i, i] / np.sum(cm[i]) for i in range(len(np.unique(y_test)))])
    
    return balanced_accuracy

def print_results_table(datasets, classifiers, individual_results_tfidf, mean_accuracies_tfidf, individual_results_distilbert, mean_accuracies_distilbert):
    result_table = PrettyTable()

    # Add headers
    result_table.field_names = ["Dataset", "Classifier", "Technique", "Comparison", "Individual"]

    # Add data for TF-IDF
    for i in range(len(datasets)):
        for j in range(len(classifiers)):
            result_table.add_row([datasets[i], classifiers[j], 'TF-IDF', mean_accuracies_tfidf[i][j], individual_results_tfidf[i][j]])

    # Add data for DistilBERT
    for i in range(len(datasets)):
        for j in range(len(classifiers)):
            result_table.add_row([datasets[i], classifiers[j], 'DistilBERT', mean_accuracies_distilbert[i][j], individual_results_distilbert[i][j]])

    # Print the table
    print(result_table)

# Define datasets and classifiers
datasets = ['Mozilla', 'Eclipse', 'GCC', 'GNOME', 'WineHQ', 'FreeDesktop']
classifiers = ['KNN', 'NB', 'NN', 'RF', 'SVM']

# Initialize empty lists for individual and mean accuracies for both TF-IDF and DistilBERT
individual_results_tfidf = []
individual_results_distilbert = []

mean_accuracies_tfidf = []
mean_accuracies_distilbert = []

# Load and preprocess data for different projects
data_paths = [
  r"C:\Users\admin\Desktop\New folder\freedesktop_bug_report_data.csv",
  r"C:\Users\admin\Desktop\New folder\gcc_bug_report_data.csv",
  r"C:\Users\admin\Desktop\New folder\gnome_bug_report_data.csv",
  r"C:\Users\admin\Desktop\New folder\mozilla_bug_report_data.csv",
  r"C:\Users\admin\Desktop\New folder\winehq_bug_report_data.csv",
  r"C:\Users\admin\Desktop\New folder\eclipse_bug_report_data.csv"
]

for data_path in data_paths:
    bug_reports = load_and_preprocess_data(data_path)

    X_tfidf = extract_tfidf_features(bug_reports)
    X_distilbert = extract_distilbert_features(bug_reports)
    y = bug_reports['bug_class']

    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    accuracies_tfidf = []
    accuracies_distilbert = []

    for train_index, test_index in skf.split(X_tfidf, y):
        X_train_tfidf, X_test_tfidf = X_tfidf[train_index], X_tfidf[test_index]
        y_train_tfidf, y_test_tfidf = y.iloc[train_index], y.iloc[test_index]

        X_train_distilbert, X_test_distilbert = X_distilbert[train_index], X_distilbert[test_index]
        y_train_distilbert, y_test_distilbert = y.iloc[train_index], y.iloc[test_index]

        scaler = MinMaxScaler()
        X_train_distilbert = scaler.fit_transform(X_train_distilbert)
        X_test_distilbert = scaler.transform(X_test_distilbert)

        accuracies_tfidf.append([
            train_and_evaluate_classifier(
                X_train_tfidf,
                X_test_tfidf,
                y_train_tfidf,
                y_test_tfidf,
                KNeighborsClassifier(),
                'TF-IDF'
            ),
            train_and_evaluate_classifier(
                X_train_tfidf,
                X_test_tfidf,
                y_train_tfidf,
                y_test_tfidf,
                MultinomialNB(),
                'TF-IDF'
            ),
            train_and_evaluate_classifier(
                X_train_tfidf,
                X_test_tfidf,
                y_train_tfidf,
                y_test_tfidf,
                MLPClassifier(),
                'TF-IDF'
            ),
            train_and_evaluate_classifier(
                X_train_tfidf,
                X_test_tfidf,
                y_train_tfidf,
                y_test_tfidf,
                RandomForestClassifier(),
                'TF-IDF'
            ),
            train_and_evaluate_classifier(
                X_train_tfidf,
                X_test_tfidf,
                y_train_tfidf,
                y_test_tfidf,
                SVC(),
                'TF-IDF'
            )
        ])

        accuracies_distilbert.append([
            train_and_evaluate_classifier(
                X_train_distilbert,
                X_test_distilbert,
                y_train_distilbert,
                y_test_distilbert,
                KNeighborsClassifier(),
                'DistilBERT'
            ),
            train_and_evaluate_classifier(
                X_train_distilbert,
                X_test_distilbert,
                y_train_distilbert,
                y_test_distilbert,
                MultinomialNB(),
                'DistilBERT'
            ),
            train_and_evaluate_classifier(
                X_train_distilbert,
                X_test_distilbert,
                y_train_distilbert,
                y_test_distilbert,
                MLPClassifier(),
                'DistilBERT'
            ),
            train_and_evaluate_classifier(
                X_train_distilbert,
                X_test_distilbert,
                y_train_distilbert,
                y_test_distilbert,
                RandomForestClassifier(),
                'DistilBERT'
            ),
            train_and_evaluate_classifier(
                X_train_distilbert,
                X_test_distilbert,
                y_train_distilbert,
                y_test_distilbert,
                SVC(),
                'DistilBERT'
            )
        ])

    individual_results_tfidf.append(np.mean(accuracies_tfidf, axis=0))
    individual_results_distilbert.append(np.mean(accuracies_distilbert, axis=0))

    mean_accuracies_tfidf.append(np.mean(accuracies_tfidf, axis=0))
    mean_accuracies_distilbert.append(np.mean(accuracies_distilbert, axis=0))

# Print the results table
print_results_table(datasets, classifiers, individual_results_tfidf, mean_accuracies_tfidf, individual_results_distilbert, mean_accuracies_distilbert)


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mo

+-------------+------------+------------+---------------------+---------------------+
|   Dataset   | Classifier | Technique  |      Comparison     |      Individual     |
+-------------+------------+------------+---------------------+---------------------+
|   Mozilla   |    KNN     |   TF-IDF   |  0.523282886293557  |  0.523282886293557  |
|   Mozilla   |     NB     |   TF-IDF   |  0.5201567476415037 |  0.5201567476415037 |
|   Mozilla   |     NN     |   TF-IDF   |  0.5300416871910775 |  0.5300416871910775 |
|   Mozilla   |     RF     |   TF-IDF   |  0.5290110322732275 |  0.5290110322732275 |
|   Mozilla   |    SVM     |   TF-IDF   |  0.5266005740395984 |  0.5266005740395984 |
|   Eclipse   |    KNN     |   TF-IDF   |  0.5282475245352677 |  0.5282475245352677 |
|   Eclipse   |     NB     |   TF-IDF   |  0.5350091199270406 |  0.5350091199270406 |
|   Eclipse   |     NN     |   TF-IDF   |  0.5291947942766974 |  0.5291947942766974 |
|   Eclipse   |     RF     |   TF-IDF   |  0.530563821

In [3]:
from tabulate import tabulate

# Define the results
results = {
    "Mozilla Bug Report Data": {
        "TF-IDF": {
            "KNeighborsClassifier": 0.523282886293557,
            "MultinomialNB": 0.5201567476415037,
            "MLPClassifier": 0.5300416871910775,
            "RandomForestClassifier": 0.5290110322732275,
            "SVM": 0.5266005740395984,
        },
        "DistilBERT": {
            "KNeighborsClassifier": 0.5372469189237482,
            "MultinomialNB": 0.5195384163143311,
            "MLPClassifier": 0.5313076615210762,
            "RandomForestClassifier": 0.5240170082471912,
            "SVM": 0.5213200302453351,
        },
    },
    "Eclipse Bug Report Data": {
        "TF-IDF": {
            "KNeighborsClassifier": 0.5282475245352677,
            "MultinomialNB": 0.5350091199270406,
            "MLPClassifier": 0.5291947942766974,
            "RandomForestClassifier": 0.5305638214688098,
            "SVM": 0.5366991857879571,
        },
        "DistilBERT": {
            "KNeighborsClassifier": 0.5426814935995976,
            "MultinomialNB": 0.5365774300609718,
            "MLPClassifier": 0.5430566132790524,
            "RandomForestClassifier": 0.5367846459290181,
            "SVM": 0.5463981772681901,
        },
    },
    "GCC Bug Report Data": {
        "TF-IDF": {
            "KNeighborsClassifier": 0.5356236834762431,
            "MultinomialNB": 0.5355996102814348,
            "MLPClassifier": 0.5333718641112963,
            "RandomForestClassifier": 0.528862137059593,
            "SVM": 0.5327667059085797,
        },
        "DistilBERT": {
            "KNeighborsClassifier": 0.5273892983967884,
            "MultinomialNB": 0.5417135898313032,
            "MLPClassifier": 0.5359163033100613,
            "RandomForestClassifier": 0.5342795263008082,
            "SVM": 0.540288984202987,
        },
    },
    "GNOME Bug Report Data": {
        "TF-IDF": {
            "KNeighborsClassifier": 0.5252517183279306,
            "MultinomialNB": 0.5248413591635419,
            "MLPClassifier": 0.5389307639068345,
            "RandomForestClassifier": 0.5376287553747182,
            "SVM": 0.5323085795117285,
        },
        "DistilBERT": {
            "KNeighborsClassifier": 0.5481377496683969,
            "MultinomialNB": 0.5292843556469442,
            "MLPClassifier": 0.5607710787063434,
            "RandomForestClassifier": 0.5366027831797271,
            "SVM": 0.5563869440331881,
        },
    },
    "WineHQ Dataset": {
        "TF-IDF": {
            "KNeighborsClassifier": 0.5133435868871982,
            "MultinomialNB": 0.5,
            "MLPClassifier": 0.5207878799286959,
            "RandomForestClassifier": 0.5177391739156004,
            "SVM": 0.5011416254889792,
        },
        "DistilBERT": {
            "KNeighborsClassifier": 0.521640861738422,
            "MultinomialNB": 0.5159368227688848,
            "MLPClassifier": 0.49888128591061875,
            "RandomForestClassifier": 0.5024309808113181,
            "SVM": 0.5,
        },
    },
    "FreeDesktop Dataset": {
        "TF-IDF": {
            "KNeighborsClassifier": 0.5283337777472426,
            "MultinomialNB": 0.5247450126572876,
            "MLPClassifier": 0.5248747764133688,
            "RandomForestClassifier": 0.5297319566615724,
            "SVM": 0.5283718434517481,
        },
        "DistilBERT": {
            "KNeighborsClassifier": 0.5385215946504625,
            "MultinomialNB": 0.5359569021588743,
            "MLPClassifier": 0.5490439679491023,
            "RandomForestClassifier": 0.5450826075018526,
            "SVM": 0.5622813090936178,
        },
    },
}

# Find overall best dataset, technique, and classifier
overall_best_dataset = max(results.keys(), key=lambda dataset: max((results[dataset][technique][model] for technique in results[dataset] for model in results[dataset][technique] if results[dataset][technique][model] is not None), default=float('-inf')))
overall_best_technique = max(results[overall_best_dataset].keys(), key=lambda technique: max((results[overall_best_dataset][technique][model] for model in results[overall_best_dataset][technique] if results[overall_best_dataset][technique][model] is not None), default=float('-inf')))
overall_best_classifier = max(
    results[overall_best_dataset][overall_best_technique].keys(),
    key=lambda model: results[overall_best_dataset][overall_best_technique][model] if results[overall_best_dataset][overall_best_technique][model] is not None else float('-inf')
)

print(f"\nOverall Best Dataset: {overall_best_dataset}")
print(f"Overall Best Technique: {overall_best_technique}")
print(f"Overall Best Classifier: {overall_best_classifier}")




Overall Best Dataset: FreeDesktop Dataset
Overall Best Technique: DistilBERT
Overall Best Classifier: SVM


In [1]:
from tabulate import tabulate
results = {
    "Mozilla": {
        "TF-IDF": {
            "KNN": 0.523282886293557,
            "NB": 0.5201567476415037,
            "NN": 0.5300416871910775,
            "RF": 0.5290110322732275,
            "SVM": 0.5266005740395984,
        },
        "DistilBERT": {
            "KNN": 0.5372469189237482,
            "NB": 0.5195384163143311,
            "NN": 0.5313076615210762,
            "RF": 0.5240170082471912,
            "SVM": 0.5213200302453351,
        },
    },
    "Eclipse": {
        "TF-IDF": {
            "KNN": 0.5282475245352677,
            "NB": 0.5350091199270406,
            "NN": 0.5291947942766974,
            "RF": 0.5305638214688098,
            "SVM": 0.5366991857879571,
        },
        "DistilBERT": {
            "KNN": 0.5426814935995976,
            "NB": 0.5365774300609718,
            "NN": 0.5430566132790524,
            "RF": 0.5367846459290181,
            "SVM": 0.5463981772681901,
        },
    },
    "GCC": {
        "TF-IDF": {
            "KNN": 0.5356236834762431,
            "NB": 0.5355996102814348,
            "NN": 0.5333718641112963,
            "RF": 0.528862137059593,
            "SVM": 0.5327667059085797,
        },
        "DistilBERT": {
            "KNN": 0.5273892983967884,
            "NB": 0.5417135898313032,
            "NN": 0.5359163033100613,
            "RF": 0.5342795263008082,
            "SVM": 0.540288984202987,
        },
    },
    "GNOME": {
        "TF-IDF": {
            "KNN": 0.5252517183279306,
            "NB": 0.5248413591635419,
            "NN": 0.5389307639068345,
            "RF": 0.5376287553747182,
            "SVM": 0.5323085795117285,
        },
        "DistilBERT": {
            "KNN": 0.5481377496683969,
            "NB": 0.5292843556469442,
            "NN": 0.5607710787063434,
            "RF": 0.5366027831797271,
            "SVM": 0.5563869440331881,
        },
    },
    "WineHQ": {
        "TF-IDF": {
            "KNN": 0.5133435868871982,
            "NB": 0.5,
            "NN": 0.5207878799286959,
            "RF": 0.5177391739156004,
            "SVM": 0.5011416254889792,
        },
        "DistilBERT": {
            "KNN": 0.521640861738422,
            "NB": 0.5159368227688848,
            "NN": 0.49888128591061875,
            "RF": 0.5024309808113181,
            "SVM": 0.5,
        },
    },
    "FreeDesktop": {
        "TF-IDF": {
            "KNN": 0.5283337777472426,
            "NB": 0.5247450126572876,
            "NN": 0.5248747764133688,
            "RF": 0.5297319566615724,
            "SVM": 0.5283718434517481,
        },
        "DistilBERT": {
            "KNN": 0.5385215946504625,
            "NB": 0.5359569021588743,
            "NN": 0.5490439679491023,
            "RF": 0.5450826075018526,
            "SVM": 0.5622813090936178,
        },
    },
}

# Prepare data for tabulation
table_data = []
best_methods = {}
best_classifiers = {}
best_techniques = {}
best_accuracies = {}

for dataset, methods in results.items():
    best_method = max(methods.keys(), key=lambda method: max((methods[method][model] for model in methods[method] if methods[method][model] is not None), default=float('-inf')))
    best_classifier = max(methods[best_method].keys(), key=lambda model: methods[best_method][model] if methods[best_method][model] is not None else float('-inf'))
    best_accuracy = max((methods[best_method][model] for model in methods[best_method] if methods[best_method][model] is not None), default=None)
    best_technique = best_method  # Initialize best technique as the best method for now
    table_data.append([dataset, best_method, best_classifier, best_accuracy, best_technique])
    best_methods[dataset] = best_method
    best_classifiers[dataset] = best_classifier
    best_techniques[dataset] = best_technique
    best_accuracies[dataset] = best_accuracy

# Create the table
headers = ["Dataset", "Best Method", "Best Classifier", "Best Accuracy", "Best Technique"]
table = tabulate(table_data, headers=headers, tablefmt="pretty")

# Print the table
print(table)

# Find overall best classifier and method
overall_best_dataset = max(best_methods.keys(), key=lambda dataset: max((results[dataset][best_methods[dataset]][model] for model in results[dataset][best_methods[dataset]] if results[dataset][best_methods[dataset]][model] is not None), default=float('-inf')))
overall_best_classifier = max(
    results[overall_best_dataset][best_methods[overall_best_dataset]].keys(),
    key=lambda model: results[overall_best_dataset][best_methods[overall_best_dataset]][model] if results[overall_best_dataset][best_methods[overall_best_dataset]][model] is not None else float('-inf')
)
overall_best_technique = max(set(best_techniques.values()), key=lambda technique: list(best_techniques.values()).count(technique))
overall_best_accuracy = best_accuracies[overall_best_dataset]

print(f"\nOverall Best Dataset: {overall_best_dataset}")
print(f"Overall Best Classifier: {overall_best_classifier}")
print(f"Overall Best Technique: {overall_best_technique}")
print(f"Overall Best Accuracy: {overall_best_accuracy}")

+-------------+-------------+-----------------+--------------------+----------------+
|   Dataset   | Best Method | Best Classifier |   Best Accuracy    | Best Technique |
+-------------+-------------+-----------------+--------------------+----------------+
|   Mozilla   | DistilBERT  |       KNN       | 0.5372469189237482 |   DistilBERT   |
|   Eclipse   | DistilBERT  |       SVM       | 0.5463981772681901 |   DistilBERT   |
|     GCC     | DistilBERT  |       NB        | 0.5417135898313032 |   DistilBERT   |
|    GNOME    | DistilBERT  |       NN        | 0.5607710787063434 |   DistilBERT   |
|   WineHQ    | DistilBERT  |       KNN       | 0.521640861738422  |   DistilBERT   |
| FreeDesktop | DistilBERT  |       SVM       | 0.5622813090936178 |   DistilBERT   |
+-------------+-------------+-----------------+--------------------+----------------+

Overall Best Dataset: FreeDesktop
Overall Best Classifier: SVM
Overall Best Technique: DistilBERT
Overall Best Accuracy: 0.5622813090936178

In [4]:
from tabulate import tabulate

# Existing results data
results = {
    "Mozilla Bug Report Data": {
        "TF-IDF": {"KNeighborsClassifier": 0.538, "MultinomialNB": 0.521, "MLPClassifier": 0.54, "RandomForestClassifier": 0.545, "SVM": 0.535},
        "BERT": {"KNeighborsClassifier": 0.565, "MultinomialNB": 0.5355, "MLPClassifier": 0.5525, "RandomForestClassifier": 0.5435, "SVM": 0.5595},
    },
    "Eclipse Bug Report Data": {
        "TF-IDF": {"KNeighborsClassifier": 0.5038, "MultinomialNB": 0.5233, "MLPClassifier": 0.5167, "RandomForestClassifier": 0.5161, "SVM": 0.5225},
        "BERT": {"KNeighborsClassifier": 0.5151, "MultinomialNB": 0.5202, "MLPClassifier": 0.5261, "RandomForestClassifier": 0.5279, "SVM": 0.5474},
    },
    "GCC Bug Report Data": {
        "TF-IDF": {"KNeighborsClassifier": 0.5363, "MultinomialNB": 0.5463, "MLPClassifier": 0.5338, "RandomForestClassifier": 0.5383, "SVM": 0.5448},
        "BERT": {"KNeighborsClassifier": 0.5558, "MultinomialNB": 0.5343, "MLPClassifier": 0.5488, "RandomForestClassifier": 0.5463, "SVM": 0.5528},
    },
    "GNOME Bug Report Data": {
        "TF-IDF": {"KNeighborsClassifier": 0.5436, "MultinomialNB": 0.544, "MLPClassifier": 0.5302, "RandomForestClassifier": 0.5370, "SVM": 0.5327},
        "BERT": {"KNeighborsClassifier": 0.5316, "MultinomialNB": 0.5423, "MLPClassifier": 0.5431, "RandomForestClassifier": 0.5395, "SVM": 0.5427},
    },
    "WineHQ Bug Report Data": {
        "TF-IDF": {"KNeighborsClassifier": 0.5119, "MultinomialNB": 0.5058, "MLPClassifier": 0.5337, "RandomForestClassifier": 0.5251, "SVM": 0.5066},
        "BERT": {"KNeighborsClassifier": 0.5187, "MultinomialNB": 0.5058, "MLPClassifier": 0.5058, "RandomForestClassifier": 0.5066, "SVM": 0.5066},
    },
    "FreeDesktop Bug Report Data": {
        "TF-IDF": {"KNeighborsClassifier": 0.5269, "MultinomialNB": 0.5209, "MLPClassifier": 0.5392, "RandomForestClassifier": 0.5330, "SVM": 0.5333},
        "BERT": {"KNeighborsClassifier": 0.5549, "MultinomialNB": 0.5161, "MLPClassifier": 0.5335, "RandomForestClassifier": 0.5278, "SVM": 0.5204},
    },
}

table_data = []
best_methods = {}
best_classifiers = {}
best_techniques = {}
best_accuracies = {}

for dataset, methods in results.items():
    best_method = max(methods.keys(), key=lambda method: max((methods[method][model] for model in methods[method] if methods[method][model] is not None), default=float('-inf')))
    best_classifier = max(methods[best_method].keys(), key=lambda model: methods[best_method][model] if methods[best_method][model] is not None else float('-inf'))
    best_accuracy = max((methods[best_method][model] for model in methods[best_method] if methods[best_method][model] is not None), default=None)
    best_technique = best_method  # Initialize best technique as the best method for now
    table_data.append([dataset, best_method, best_classifier, best_accuracy, best_technique])
    best_methods[dataset] = best_method
    best_classifiers[dataset] = best_classifier
    best_techniques[dataset] = best_technique
    best_accuracies[dataset] = best_accuracy

# Create the table
headers = ["Dataset", "Best Method", "Best Classifier", "Best Accuracy", "Best Technique"]
table = tabulate(table_data, headers=headers, tablefmt="pretty")

# Print the table
print(table)

# Find overall best classifier and method
overall_best_dataset = max(best_methods.keys(), key=lambda dataset: max((results[dataset][best_methods[dataset]][model] for model in results[dataset][best_methods[dataset]] if results[dataset][best_methods[dataset]][model] is not None), default=float('-inf')))
overall_best_classifier = max(
    results[overall_best_dataset][best_methods[overall_best_dataset]].keys(),
    key=lambda model: results[overall_best_dataset][best_methods[overall_best_dataset]][model] if results[overall_best_dataset][best_methods[overall_best_dataset]][model] is not None else float('-inf')
)
overall_best_technique = max(set(best_techniques.values()), key=lambda technique: list(best_techniques.values()).count(technique))
overall_best_accuracy = best_accuracies[overall_best_dataset]

print(f"\nOverall Best Dataset: {overall_best_dataset}")
print(f"Overall Best Classifier: {overall_best_classifier}")
print(f"Overall Best Technique: {overall_best_technique}")
print(f"Overall Best Accuracy: {overall_best_accuracy}")

+-----------------------------+-------------+----------------------+---------------+----------------+
|           Dataset           | Best Method |   Best Classifier    | Best Accuracy | Best Technique |
+-----------------------------+-------------+----------------------+---------------+----------------+
|   Mozilla Bug Report Data   |    BERT     | KNeighborsClassifier |     0.565     |      BERT      |
|   Eclipse Bug Report Data   |    BERT     |         SVM          |    0.5474     |      BERT      |
|     GCC Bug Report Data     |    BERT     | KNeighborsClassifier |    0.5558     |      BERT      |
|    GNOME Bug Report Data    |   TF-IDF    |    MultinomialNB     |     0.544     |     TF-IDF     |
|   WineHQ Bug Report Data    |   TF-IDF    |    MLPClassifier     |    0.5337     |     TF-IDF     |
| FreeDesktop Bug Report Data |    BERT     | KNeighborsClassifier |    0.5549     |      BERT      |
+-----------------------------+-------------+----------------------+--------------