In [None]:
import tkinter as tk
from tkinter import filedialog, messagebox
import threading
import joblib
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

df = None  # Global dataset variable
accuracies = {}  # Dictionary to store model accuracies
canvas = None  # Canvas for embedding plots

def select_dataset():
    global df
    file_path = filedialog.askopenfilename(filetypes=[("CSV Files", "*.csv")])
    if file_path:
        try:
            df = pd.read_csv(file_path)
            if df.empty or df.shape[1] < 2:
                raise ValueError("Invalid dataset: Must have at least one feature column and a target column.")
            
            file_label.config(text=f"Selected: {file_path.split('/')[-1]}", fg="black")
            messagebox.showinfo("Success", "Dataset Loaded Successfully")
        except Exception as e:
            messagebox.showerror("Error", f"Failed to load dataset: {str(e)}")
            df = None

def preprocess_data():
    global df
    if df is None:
        return None, None, None, None
    
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]

    # Handle missing values
    for column in X.select_dtypes(include=['number']).columns:
        X[column] = pd.to_numeric(X[column], errors='coerce')
        X[column].fillna(X[column].mean(), inplace=True)

    for column in X.select_dtypes(include=['object']).columns:
        X[column].fillna("missing", inplace=True)
        le = LabelEncoder()
        X[column] = le.fit_transform(X[column])

    return train_test_split(X, y, test_size=0.2, random_state=42)

def train_model(model_name, model):
    global df, accuracies
    if df is None:
        messagebox.showerror("Error", "Please select a dataset first")
        return
    
    X_train, X_test, y_train, y_test = preprocess_data()
    if X_train is None:
        return

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = round(accuracy_score(y_test, y_pred), 4)
    accuracies[model_name] = accuracy

    joblib.dump(model, f"{model_name}.joblib")

    # ✅ Use `root.after()` to update UI safely
    root.after(0, lambda: output_label.config(text=f"{model_name} Accuracy: {accuracy}", fg="black"))

    plot_bar_chart(model_name, y_test, y_pred)

def plot_bar_chart(model_name, y_test, y_pred):
    global canvas
    labels = ['Benign', 'Malicious']
    counts = [list(y_pred).count(0), list(y_pred).count(1)]
    fig, ax = plt.subplots(figsize=(5, 3))
    ax.bar(labels, counts, color=['turquoise', 'salmon'])
    ax.set_ylabel("Number of Samples")
    ax.set_title(f"{model_name} Predictions")

    embed_plot(fig)

def plot_grouped_bar_chart():
    global accuracies, canvas
    if not accuracies:
        messagebox.showerror("Error", "Please train models first before running Comparative Analysis")
        return
    
    model_names = ["Logistic Regression", "Decision Tree", "Random Forest", "Naive Bayes"]
    accuracy_values = [accuracies.get(model, 0) for model in model_names]
    colors = ['#B0E0E6', '#98FB98', '#FFDAB9', '#FFE4E1']

    fig, ax = plt.subplots(figsize=(9, 5))
    ax.bar(range(len(model_names)), accuracy_values, color=colors, width=0.6)
    ax.set_xlabel("Machine Learning Models", fontsize=12)
    ax.set_ylabel("Accuracy", fontsize=12)
    ax.set_title("Comparative Analysis of Model Accuracies", fontsize=14, pad=40)

    ax.set_ylim(0, 1)
    ax.set_xticks(range(len(model_names)))
    ax.set_xticklabels(model_names, ha='right', fontsize=10)

    for i, v in enumerate(accuracy_values):
        ax.text(i, v + 0.02, f"{v:.4f}", ha='center', fontsize=10, fontweight='bold')

    plt.tight_layout(rect=[0, 0, 1, 0.95])

    # ✅ Remove old labels before embedding new plot
    for widget in output_frame.winfo_children():
        if isinstance(widget, tk.Label):  
            widget.destroy()

    embed_plot(fig)

def embed_plot(fig):
    global canvas
    if canvas:
        canvas.get_tk_widget().destroy()

    canvas = FigureCanvasTkAgg(fig, master=output_frame)
    canvas.draw()
    canvas.get_tk_widget().pack()

def logistic_regression():
    run_in_thread(lambda: train_model("Logistic Regression", LogisticRegression(max_iter=1000)))

def decision_tree():
    run_in_thread(lambda: train_model("Decision Tree", DecisionTreeClassifier()))

def random_forest():
    run_in_thread(lambda: train_model("Random Forest", RandomForestClassifier()))

def naive_bayes():
    run_in_thread(lambda: train_model("Naive Bayes", GaussianNB()))

def comparative_analysis():
    run_in_thread(plot_grouped_bar_chart)

def run_in_thread(target_function):
    thread = threading.Thread(target=target_function)
    thread.start()

# GUI Setup
root = tk.Tk()
root.title("Performance Analyzer")
root.geometry("1200x800")
root.configure(bg="#f0f0f0")

# Dataset Selection Frame
dataset_frame = tk.Frame(root, relief=tk.RIDGE, borderwidth=2, bg="white", padx=10, pady=10)
dataset_frame.pack(pady=20, fill=tk.X, padx=20)

select_button = tk.Button(dataset_frame, text="Select Dataset", command=select_dataset, width=20, font=("Arial", 12))
select_button.pack(pady=5)

file_label = tk.Label(dataset_frame, text="Selected: None", fg="gray", bg="white", font=("Arial", 10))
file_label.pack(pady=5)

# Model Selection Frame
model_frame = tk.Frame(root, padx=10, pady=10, relief=tk.RIDGE, bd=2, bg="#f0f0f0")
model_frame.pack(pady=20, fill=tk.X, padx=20)

tk.Button(model_frame, text="Logistic Regression", command=logistic_regression, width=25, font=("Arial", 12)).pack(pady=5)
tk.Button(model_frame, text="Decision Tree", command=decision_tree, width=25, font=("Arial", 12)).pack(pady=5)
tk.Button(model_frame, text="Random Forest", command=random_forest, width=25, font=("Arial", 12)).pack(pady=5)
tk.Button(model_frame, text="Gaussian Naive Bayes", command=naive_bayes, width=25, font=("Arial", 12)).pack(pady=5)
tk.Button(model_frame, text="Comparative Analysis", command=comparative_analysis, width=25, font=("Arial", 12)).pack(pady=10)

# Output Display Frame
output_frame = tk.Frame(root, relief=tk.RIDGE, borderwidth=2, bg="white", padx=10, pady=10)
output_frame.pack(pady=20, fill=tk.BOTH, expand=True, padx=20)

output_label = tk.Label(output_frame, text="Model Accuracy will be displayed here.", fg="gray", bg="white", font=("Arial", 12))
output_label.pack(pady=(5, 20))

root.mainloop()