In [None]:
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import joblib
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import os

# Global variables
df = None
results = {}

models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Naive Bayes": GaussianNB()
}

button_width = 25  # Uniform button width

def select_dataset():
    """ Load dataset and display selected file name """
    global df
    file_path = filedialog.askopenfilename(filetypes=[("CSV Files", "*.csv")])
    if file_path:
        df = pd.read_csv(file_path)
        if df.empty:
            messagebox.showerror("Error", "Dataset is empty. Please select a valid dataset.")
            return
        file_label.config(text=f"Selected: {os.path.basename(file_path)}")
        messagebox.showinfo("Success", "Dataset Loaded Successfully")

def preprocess_data():
    """ Handle missing values and split dataset into train-test """
    global df
    if df is None:
        messagebox.showerror("Error", "Please select a dataset first")
        return None, None

    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]

    for column in X.select_dtypes(include=['number']).columns:
        X[column].fillna(X[column].mean(), inplace=True)
    for column in X.select_dtypes(include=['object']).columns:
        X[column].fillna(X[column].mode()[0], inplace=True)
        le = LabelEncoder()
        X[column] = le.fit_transform(X[column])

    if y.dtype == 'float64':
        y = y.astype(int)

    return train_test_split(X, y, test_size=0.2, random_state=42)

def train_and_evaluate():
    """ Train all models and display performance metrics """
    global results
    train_button.config(state=tk.DISABLED)
    show_best_model_button.config(state=tk.DISABLED)
    plot_button.config(state=tk.DISABLED)

    X_train, X_test, y_train, y_test = preprocess_data()
    if X_train is None:
        train_button.config(state=tk.NORMAL)
        return

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    results.clear()
    for name, model in models.items():
        if name == "Logistic Regression":
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
        else:
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

        results[name] = {
            "accuracy": round(accuracy_score(y_test, y_pred), 4),
            "precision": round(precision_score(y_test, y_pred, average='weighted'), 4),
            "recall": round(recall_score(y_test, y_pred, average='weighted'), 4),
            "f1_score": round(f1_score(y_test, y_pred, average='weighted'), 4)
        }

        joblib.dump(model, f"{name.replace(' ', '_')}.joblib")

    messagebox.showinfo("Training Complete", "All models have been trained and saved!")

    train_button.config(state=tk.NORMAL)
    show_best_model_button.config(state=tk.NORMAL)
    plot_button.config(state=tk.NORMAL)

    result_text.delete("1.0", tk.END)
    header = "{:<25}{:<12}{:<12}{:<12}{:<12}".format("Model", "Accuracy", "Precision", "Recall", "F1 Score")
    result_text.insert(tk.END, "Model Evaluation Results:\n")
    result_text.insert(tk.END, header + "\n")
    result_text.insert(tk.END, "-" * len(header) + "\n")

    for model, metrics in results.items():
        result_text.insert(tk.END, "{:<25}{:<12.4f}{:<12.4f}{:<12.4f}{:<12.4f}\n".format(
            model, metrics['accuracy'], metrics['precision'], metrics['recall'], metrics['f1_score']
        ))

def show_best_model():
    """ Display the model with the highest accuracy """
    if not results:
        messagebox.showerror("Error", "No results available. Train models first.")
        return

    best_accuracy = max(results[model]["accuracy"] for model in results)
    best_models = [model for model, metrics in results.items() if metrics["accuracy"] == best_accuracy]

    result_text.insert(tk.END, "\nBest Performing Model(s):\n")
    for model in best_models:
        result_text.insert(tk.END, f"{model} - Accuracy: {best_accuracy:.4f}\n")

def plot_results():
    """ Plot model performance metrics in a scrollable frame """
    if not results:
        messagebox.showerror("Error", "No results to plot. Train models first.")
        return

    # Clear previous content
    for widget in plot_frame.winfo_children():
        widget.destroy()

    # Create a Canvas and Scrollbar
    canvas = tk.Canvas(plot_frame)
    scrollbar = tk.Scrollbar(plot_frame, orient="vertical", command=canvas.yview)
    scrollable_frame = tk.Frame(canvas)

    # Configure Scrollable Frame inside Canvas
    scrollable_frame.bind(
        "<Configure>", lambda e: canvas.configure(scrollregion=canvas.bbox("all"))
    )

    canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")
    canvas.configure(yscrollcommand=scrollbar.set)

    # Pack Scrollbar and Canvas
    scrollbar.pack(side="right", fill="y")
    canvas.pack(side="left", fill="both", expand=True)

    metrics = ["accuracy", "precision", "recall", "f1_score"]
    fig, axes = plt.subplots(4, 1, figsize=(6, 10))  # Reduce figsize from (8,16) to (6,10)
    plot_frame.pack(fill=tk.BOTH, expand=True, pady=20)  # Increase bottom padding
    # Adjust subplot spacing to avoid overlap
    plt.subplots_adjust(hspace=0.5)  # Add this line to increase vertical space


    for i, metric in enumerate(metrics):
        ax = axes[i]
        ax.bar(results.keys(), [results[model][metric] for model in results], color='skyblue',width= 0.5)
        ax.set_title(f"{metric.capitalize()} for Different Models", fontsize=14, fontweight='bold')
        ax.set_ylabel(metric.capitalize(), fontsize=12)
        ax.set_xticklabels(results.keys(), fontsize=10)

    # Embed plot inside scrollable frame
    canvas_plot = FigureCanvasTkAgg(fig, master=scrollable_frame)
    canvas_plot.get_tk_widget().pack(fill=tk.BOTH, expand=True, pady=5)
    canvas_plot.draw()

# GUI Setup
root = tk.Tk()
root.title("EVALUATOR")
root.geometry("1200x800")
root.configure(bg="#f0f0f0")


dataset_frame = tk.Frame(root, relief=tk.RIDGE, borderwidth=2, bg="white")
dataset_frame.pack(pady=10, fill=tk.X, padx=20)

select_button = tk.Button(dataset_frame, text="Select Dataset", command=select_dataset, font=("Arial", 12), width=button_width)
select_button.pack(pady=5)

file_label = tk.Label(dataset_frame, text="Selected: None", fg="gray", bg="white", font=("Arial", 10))
file_label.pack()

train_button = tk.Button(root, text="Train and Evaluate Models", command=train_and_evaluate, font=("Arial", 12), width=button_width)
train_button.pack(pady=5)

show_best_model_button = tk.Button(root, text="Show Best Model", command=show_best_model, font=("Arial", 12), width=button_width, state=tk.DISABLED)
show_best_model_button.pack(pady=5)

plot_button = tk.Button(root, text="Plot Results", command=plot_results, font=("Arial", 12), width=button_width, state=tk.DISABLED)
plot_button.pack(pady=5)

result_text = tk.Text(root, height=4, font=("Courier New", 12))
result_text.pack(pady=3, fill=tk.BOTH, expand=True)


plot_frame = tk.Frame(root, height=200)
plot_frame.pack(fill=tk.BOTH, expand=True, pady=15, padx=15)  # Add padding for distinction
plot_frame.configure(bg="white", relief=tk.RIDGE, borderwidth=2)  # Add border to make it visible


root.mainloop()