In [None]:
import tkinter as tk
from tkinter import ttk, filedialog, simpledialog, Listbox, Scrollbar, messagebox
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
from itertools import product
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from tksheet import Sheet

class MachineLearningApp:
    def __init__(self, master):
        self.master = master
        self.dataset = None
        self.trained_model = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.setup_ui()

    def setup_ui(self):
        self.master.title("Machine Learning Application")
        self.master.geometry("900x500")
        self.master.config(bg="#B2D0C6")

        self.style = ttk.Style()
        self.style.theme_use('xpnative')
        #allself.style.configure("My.TFrame", background="#B2D0C6")

        self.create_widgets()

    def create_widgets(self):
        self.create_header()
        self.create_button_frame()
        self.create_data_frame()

    def create_header(self):
        self.label = tk.Label(self.master, text="DataInsight Machine Learning", font=('Comic Sans MS', 18), fg="#1B5F5E", bg="#B2D0C6")
        self.label.pack(pady=10)


    def create_button_frame(self):
        button_frame = ttk.Frame(self.master)
        button_frame.pack(expand=True, fill='both')
    
        # Create a style object and set the background color
        style = ttk.Style()
        style.configure("TFrame", background="#B2D0C6")
    
        # Apply the style to the button_frame
        button_frame.configure(style="TFrame")

        # Create the four buttons on the left
        self.create_button(button_frame, "Create Dataset", self.create_dataset, side="left")
        self.create_button(button_frame, "Load Dataset", self.load_dataset, side="left")
        self.create_button(button_frame, "Data information", lambda: self.display_dataset_info(self.dataset), side="left")
        self.create_button(button_frame, "Display Dataset", self.visualize_data, side="left")
    
        # Create the "Clear All" button on the right
        self.create_button(button_frame, "Clear All", self.clear, side="right")

    def create_button(self, frame, text, command, side="left"):
        button = tk.Button(frame, text=text, command=command, bg="#1B5F5E", fg="#B2D0C6", font=("Courier", 12, 'normal'))
        button.pack(side=side, padx=10, pady=15)


    def create_data_frame(self):
        self.frame = ttk.Frame(self.master)
        self.frame.pack(expand=True, fill='both')

        button_padding = {'padx': 2, 'pady': 2, 'sticky': 'nsew'}
        button_size = {'width': 5, 'height': 5}

        self.box1 = Listbox(self.frame, selectmode='multiple', bd=2, bg="#B2D0C6", fg="black", font=("Courier", 12, 'normal'), height=15, width=20)
        self.box1.grid(row=0, column=0, **button_padding)

        self.box2 = Listbox(self.frame, selectmode='multiple', bd=2, bg="#B2D0C6", fg="black", font=("Courier", 12, 'normal'), height=15, width=20)
        self.box2.grid(row=0, column=1, **button_padding)
        tk.Button(self.frame, text='Select X', command=self.x_values, bg="#1B5F5E", fg="white", font=("Courier", 12, 'normal'), **button_size).grid(row=1, column=0, **button_padding)

        self.box3 = Listbox(self.frame, selectmode='multiple', bd=2, bg="#B2D0C6", fg="black", font=("Courier", 12, 'normal'), height=15, width=20)
        self.box3.grid(row=0, column=2, **button_padding)
        tk.Button(self.frame, text='Select y', command=self.y_values, bg="#1B5F5E", fg="white", font=("Courier", 12, 'normal'), **button_size).grid(row=1, column=1, **button_padding)
        
        tk.Button(self.frame, text='Display Graphs', command=self.sol, bg="#1B5F5E",fg="white" ,font=("Helvetica", 12, 'normal'), **button_size).grid(row=1, column=2, **button_padding)
        tk.Button(self.frame, text='Handle Missing Values', command=self.show_handle_missing_values_window , fg="white", bg="#1B5F5E", font=("Helvetica", 12, 'normal'), **button_size).grid(row=2, column=0, **button_padding)
        tk.Button(self.frame, text='Remove Unnecessary Feature', command=self.remove_features, bg="#1B5F5E" , fg="white", font=("Helvetica", 12, 'normal'), **button_size).grid(row=2, column=1, **button_padding)

        tk.Button(self.frame, text='Handle Outliers', command=self.handle_outliers, bg="#1B5F5E",fg="white", font=("Helvetica", 12, 'normal'), **button_size).grid(row=2, column=2, **button_padding)
        tk.Button(self.frame, text='Scale Data', command=self.scale_data, bg="#1B5F5E",fg="white", font=("Helvetica", 12, 'normal'), **button_size).grid(row=3, column=0, **button_padding)
        tk.Button(self.frame, text='Encode Categorical Variables', command=self.encode_categorical, fg="white", bg="#1B5F5E", font=("Helvetica", 12, 'normal'), **button_size).grid(row=3, column=1, **button_padding)

        tk.Button(self.frame, text='Train your model', command=self.select_and_train_model, bg="#1B5F5E",  fg="white", font=("Helvetica", 12, 'normal'), **button_size).grid(row=3, column=2, **button_padding)
        tk.Button(self.frame, text='Test your model', command=self.test, bg="#1B5F5E", font=("Helvetica", 12, 'normal'), fg="white", **button_size).grid(row=4, column=0, **button_padding)
        tk.Button(self.frame, text='Show result', command=self.show_result, bg="#1B5F5E", font=("Helvetica", 12, 'normal') , fg="white", **button_size).grid(row=4, column=1, **button_padding)
        
        self.frame.grid_columnconfigure(0, weight=1)
        self.frame.grid_columnconfigure(1, weight=1)
        self.frame.grid_columnconfigure(2, weight=1)
        self.frame.grid_rowconfigure(0, weight=1)
        self.frame.grid_rowconfigure(1, weight=1)
        self.frame.grid_rowconfigure(2, weight=1)
        self.frame.grid_rowconfigure(3, weight=1)
        self.frame.grid_rowconfigure(4, weight=1)
        self.frame.grid_rowconfigure(5, weight=1)
        self.frame.grid_rowconfigure(6, weight=1)


    def create_dataset(self):
        dataset_window = tk.Toplevel(self.master)
        dataset_window.title("Create Dataset")

        num_rows = simpledialog.askinteger("Dataset Configuration", "Enter the number of rows:")
        num_columns = simpledialog.askinteger("Dataset Configuration", "Enter the number of columns:")

        if num_rows is None or num_columns is None:
            return

        self.sheet = Sheet(dataset_window, page_up_down_select_row=True, column_width=120)
        self.sheet.headers(['Column ' + str(i + 1) for i in range(num_columns)])

        for _ in range(num_rows):
            self.sheet.insert_row([""] * num_columns)

        self.sheet.enable_bindings("all")
        self.sheet.pack(fill="both", expand=True)

        tk.Button(dataset_window, text='Save Dataset', command=self.save_dataset).pack(pady=10)

    def save_dataset(self):
        data = self.sheet.get_sheet_data()

        for row in data:
            for cell in row:
                if not cell:
                    messagebox.showwarning("Warning", "Please fill all cells before saving the dataset.")
                    return

        file_path = tk.filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")])

        if file_path:
            with open(file_path, 'w', newline='') as csvfile:
                csv_writer = csv.writer(csvfile)
                csv_writer.writerows(data)

            messagebox.showinfo("Success", f"Dataset saved to {file_path}")

        self.sheet.destroy()
        

    def load_dataset(self):
        file_path = filedialog.askopenfilename(title="Select a dataset file", filetypes=[("CSV files", "*.csv")])
        if file_path:
            print(f"Dataset selected: {file_path}")
    
            try:
                # Display a message indicating that the dataset is being loaded
                loading_message = tk.Label(self.master, text="Loading dataset...", font=('Helvetica', 12, 'italic'))
                loading_message.pack(pady=20)
                self.master.update()  # Ensure the message is displayed immediately
    
                self.dataset = pd.read_csv(file_path)
    
                # Remove the loading message once the dataset is loaded
                loading_message.pack_forget()
            except Exception as e:
                loading_message.pack_forget()  # Remove the loading message in case of an error
                print(f"Error loading dataset: {e}")

    def display_dataset_info(self, dataset):
        if self.dataset is None or dataset.empty:
            messagebox.showwarning("Warning", "Please load the dataset before displaying!")
            return

        info_window = tk.Toplevel(self.master)
        info_window.title("Dataset Information")
        info_window.geometry("800x600")

        columns = list(dataset.columns)
        tree = ttk.Treeview(info_window, columns=columns, show="headings")

        for col in columns:
            tree.heading(col, text=col)
            tree.column(col, width=100)

        search_entry = tk.Entry(info_window)
        search_entry.pack(pady=10)
        search_button = tk.Button(info_window, text="Search", command=lambda: self.search_data(tree, dataset, search_entry.get()))
        search_button.pack(pady=5)

        for index, row in dataset.iterrows():
            tree.insert("", "end", values=list(row))

        tree.pack(expand=True, fill="both", side="top")

        info_label = tk.Label(info_window, text=f"Dataset Shape: {dataset.shape}", fg="black", bg="white")
        info_label.pack(pady=5)

        summary_label = tk.Label(info_window, text=dataset.describe().to_string(), fg="black", bg="white")
        summary_label.pack(pady=5)

    def search_data(self, tree, dataset, query):
        tree.delete(*tree.get_children())

        for index, row in dataset.iterrows():
            if query.lower() in str(row).lower():
                tree.insert("", "end", values=list(row))

    def visualize_data(self):
        if self.dataset is None or self.dataset.empty:
            messagebox.showwarning("Warning", "Please load the dataset before displaying!")
            return
        if self.box1.size() == 0:
            for column in self.dataset.columns:
                self.box1.insert(tk.END, column)


    def clear(self):
        self.box1.delete(0, tk.END)
        self.box2.delete(0, tk.END)
        self.box3.delete(0, tk.END)

    def x_values(self):
        values = [self.box1.get(idx) for idx in self.box1.curselection()]
        if self.box2.size() == 0 :
            for i in range(len(list(values))):
                self.box2.insert(i+1, values[i])
                self.box1.selection_clear(i+1, tk.END)

        
    def y_values(self):
        values= [self.box1.get(idx) for idx in self.box1.curselection()]
        if self.box3.size() == 0:
            for i in range(len(list(values))):
                self.box3.insert(i+1, values[i])
                self.box1.selection_clear(i+1, tk.END)

    def sol(self):
        if self.dataset is None or self.dataset.empty:
            messagebox.showwarning("Warning", "Please load the dataset before displaying!")
            return

        graph_window = tk.Toplevel(self.master)
        graph_window.title("Dataset Graphs")

        visualization_types = ["Line Plot", "Bar Chart", "Scatter Plot", "Histogram", "Box Plot"]
        selected_type = tk.StringVar(graph_window)
        selected_type.set(visualization_types[0])

        type_dropdown = ttk.Combobox(graph_window, values=visualization_types, textvariable=selected_type)
        type_dropdown.pack(pady=10)

        visualize_button = tk.Button(graph_window, text="Visualize", command=lambda: self.visualize_selected(selected_type.get()))
        visualize_button.pack(pady=10)

    def visualize_selected(self, selected_type):
        if selected_type == "Line Plot":
            self.plot_line()
        elif selected_type == "Bar Chart":
            self.plot_bar()
        elif selected_type == "Scatter Plot":
            self.plot_scatter()
        elif selected_type == "Histogram":
            self.plot_histogram()
        elif selected_type == "Box Plot":
            self.plot_boxplot()

    def plot_line(self):
        plt.figure()
        sns.lineplot(data=self.dataset)
        plt.title("Line Plot")

        # Embed the Matplotlib figure in a Tkinter window
        self.display_matplotlib_plot(plt)

    def plot_bar(self):
        plt.figure()
        sns.barplot(data=self.dataset)
        plt.title("Bar Chart")

        # Embed the Matplotlib figure in a Tkinter window
        self.display_matplotlib_plot(plt)

    def plot_scatter(self):
        x_values = self.box2.get(0, tk.END)
        y_values = self.box3.get(0, tk.END)
    
        if not x_values or not y_values:
            messagebox.showinfo("Error", "Please select X and Y values before plotting!")
            return
    
        x_columns = [value.strip() for value in x_values]
        y_columns = [value.strip() for value in y_values]
    
        # Check if the number of X and Y values match
        if len(x_columns) != len(y_columns):
            messagebox.showinfo("Error", "Please select an equal number of X and Y values for individual scatter plots!")
            return
    
        # Create a new window for displaying individual scatter plots
        scatter_window = tk.Toplevel(self.master)
        scatter_window.title("Individual Scatter Plots")
    
        # Create individual scatter plots for each X value with its corresponding Y value
        for x_column, y_column in zip(x_columns, y_columns):
            fig, ax = plt.subplots()
            sns.scatterplot(x=x_column, y=y_column, data=self.dataset, ax=ax)
            ax.set_title(f"Scatter Plot: {x_column} vs {y_column}")
            ax.set_xlabel(x_column)
            ax.set_ylabel(y_column)
    
            # Embed the Matplotlib figure in the Tkinter window
            canvas = FigureCanvasTkAgg(fig, master=scatter_window)
            canvas.draw()
            canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
    
        scatter_window.mainloop()

    
        # Embed the Matplotlib figure in the Tkinter window
        self.display_matplotlib_plot(plt)

    
    def plot_histogram(self):
        x_values = self.box2.get(0, tk.END)

        if not x_values:
            print("Please select X values before plotting!")
            return


        num_cols = 1  # Number of columns in the grid layout
        num_rows = len(x_values)  # Number of rows in the grid layout
        margin = 0.9  # Adjust the margin as needed

        # Create a new window for displaying histograms
        histogram_window = tk.Toplevel(self.master)
        histogram_window.title("Histograms")

        # Create a canvas to hold the histograms
        canvas = FigureCanvasTkAgg(plt.Figure(figsize=(10, 5 * num_rows)), master=histogram_window)
        canvas.draw()

        # Create a vertical scrollbar
        scrollbar = Scrollbar(histogram_window, orient='ver', command=canvas.get_tk_widget().yview)
        scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

        # Attach the scrollbar to the canvas
        canvas.get_tk_widget().config(yscrollcommand=scrollbar.set)

        # Adjust spacing between subplots
        canvas.figure.subplots_adjust(hspace=margin)

        for i, x_value in enumerate(x_values, start=1):
            x_column = x_value.strip()

            # Add a subplot for each histogram
            ax = canvas.figure.add_subplot(num_rows, num_cols, i)
            sns.histplot(data=self.dataset, x=x_column, kde=True, color='skyblue', bins=20, ax=ax)  # Adjust bins and color if needed
            ax.set_title(f"Histogram for {x_column}")
            ax.set_ylabel("Frequency")

            # Manually add vertical spacing between subplots
            if i == num_rows:
                ax.set_xlabel("")  # Remove x-axis label for the last subplot
            else:
                ax.set_xlabel(None)  # Set x-axis label to None for other subplots

        # Embed the Matplotlib figure in the Tkinter window
        canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
        plt.tight_layout()

        histogram_window.mainloop()


    def plot_boxplot(self):
        x_columns = list(self.box2.get(0, tk.END))
        y_column = self.box3.get(0)  # Assuming there's only one selected y-column
        if not x_columns or not y_column:
            print("Please select X and Y columns before plotting!")
            return

        # Filter the dataset to include only the selected columns
        selected_columns_data = self.dataset[x_columns + [y_column]]

        # Create a new window for displaying the box plot
        boxplot_window = tk.Toplevel(self.master)
        boxplot_window.title("Box Plot")

        # Use Seaborn to create a box plot
        fig, ax = plt.subplots()
        sns.boxplot(data=selected_columns_data, ax=ax)
    
        # Embed the Matplotlib figure in the Tkinter window
        canvas = FigureCanvasTkAgg(fig, master=boxplot_window)
        canvas.draw()
        canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)

        boxplot_window.mainloop()



    def display_matplotlib_plot(self, plt):
        # Create a new window to display the Matplotlib plot
        if self.box2.size() != 0 and self.box3.size() != 0 :
            plot_window = tk.Toplevel(self.master)
            plot_window.title("Matplotlib Plot")
    
            # Embed the Matplotlib figure in the Tkinter window
            canvas = FigureCanvasTkAgg(plt.gcf(), master=plot_window)
            canvas.draw()
            canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
    
            # Add a Matplotlib toolbar to the Tkinter window
            toolbar = NavigationToolbar2Tk(canvas, plot_window)
            toolbar.update()
            canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
            toolbar.pack()

    def show_handle_missing_values_window(self):
        if self.dataset is None or self.dataset.empty:
            messagebox.showwarning("Warning", "Please load the dataset before displaying!")
            return
        handle_window = tk.Toplevel(self.master)
        handle_window.title("Handle Missing Values")
    
        # Frame for handling options
        handling_frame = ttk.Frame(handle_window, padding=10)
        handling_frame.grid(row=0, column=0, padx=10, pady=10, sticky='nsew')
    
        # Numeric handling options
        tk.Label(handling_frame, text="Numeric Handling Options", font=('Arial', 12)).grid(row=0, column=0, columnspan=2, pady=5, sticky='w')
    
        tk.Label(handling_frame, text="Numeric Imputation Strategy:").grid(row=1, column=0, pady=5, sticky='w')
        self.numeric_imputation_strategy_var = tk.StringVar()
        numeric_imputation_strategy_combobox = ttk.Combobox(handling_frame, textvariable=self.numeric_imputation_strategy_var,
                                                            values=['mean', 'median', 'zero', 'ffill', 'bfill'])
        numeric_imputation_strategy_combobox.set('mean')  # Default strategy
        numeric_imputation_strategy_combobox.grid(row=1, column=1, pady=5, sticky='w')
    
        # Categorical handling options
        tk.Label(handling_frame, text="Categorical Handling Options", font=('Arial', 12)).grid(row=2, column=0, columnspan=2, pady=10, sticky='w')
    
        tk.Label(handling_frame, text="Categorical Imputation Strategy:").grid(row=3, column=0, pady=5, sticky='w')
        self.categorical_imputation_strategy_var = tk.StringVar()
        categorical_imputation_strategy_combobox = ttk.Combobox(handling_frame, textvariable=self.categorical_imputation_strategy_var,
                                                                values=['most_frequent', 'constant', 'ffill', 'bfill'])
        categorical_imputation_strategy_combobox.set('most_frequent')  # Default strategy
        categorical_imputation_strategy_combobox.grid(row=3, column=1, pady=5, sticky='w')
    
        # Button to handle missing values
        tk.Button(handling_frame, text='Handle Missing Values', command=lambda: self.handle_missing_values(handle_window)).grid(row=4, column=0, columnspan=2, pady=10)
    
        # Frame for displaying results
        result_frame = ttk.Frame(handle_window, padding=10)
        result_frame.grid(row=0, column=1, padx=10, pady=10, sticky='nsew')
    
        # Placeholder labels for results
        tk.Label(result_frame, text="Dataset Information Before Handling", font=('Arial', 12), fg="black", bg="white").grid(row=0, column=0, pady=5, sticky='w')
        tk.Label(result_frame, text="").grid(row=1, column=0, pady=5, sticky='w')
    
        tk.Label(result_frame, text="Dataset Information After Handling", font=('Arial', 12), fg="black", bg="white").grid(row=2, column=0, pady=5, sticky='w')
        tk.Label(result_frame, text="").grid(row=3, column=0, pady=5, sticky='w')



    def handle_missing_values(self, handle_window):
        numeric_imputation_strategy = self.numeric_imputation_strategy_var.get()
        categorical_imputation_strategy = self.categorical_imputation_strategy_var.get()
    
        def inner_handle_missing_values():
            nbr_null_values_before = self.dataset.isnull().sum().sum()
            info_before = f"Number of null values before handling: {nbr_null_values_before}\n\nNull values before handling:\n{self.dataset.isnull().sum()}"
    
            # Find categorical columns
            categorical_columns = self.dataset.select_dtypes(include='object').columns
    
            # Find numeric columns
            numeric_columns = self.dataset.select_dtypes(include='number').columns
    
            if numeric_imputation_strategy in ['mean', 'median']:
                self.dataset[numeric_columns] = self.dataset[numeric_columns].fillna(getattr(self.dataset[numeric_columns], numeric_imputation_strategy)())
            else:
                self.dataset[numeric_columns] = self.dataset[numeric_columns].fillna(self.dataset[numeric_columns].mean())
    
            self.dataset[categorical_columns] = self.dataset[categorical_columns].fillna(value=categorical_imputation_strategy)
    
            self.dataset = self.dataset.dropna()
    
            nbr_null_values_after = self.dataset.isnull().sum().sum()
            info_after = f"Number of null values after handling: {nbr_null_values_after}\n\nNull values after handling:\n{self.dataset.isnull().sum()}"
    
            result_frame = handle_window.winfo_children()[1]
            result_frame.grid_columnconfigure(1, weight=1)
            result_frame.grid_rowconfigure(1, weight=1)
    
            tk.Label(result_frame, text=info_before, font=('Arial', 10), justify='left').grid(row=1, column=0, pady=5, sticky='w')
            tk.Label(result_frame, text=info_after, font=('Arial', 10), justify='left').grid(row=3, column=0, pady=5, sticky='w')
    
        inner_handle_missing_values()


    
    def remove_features(self):
        if self.dataset is None or self.dataset.empty:
            messagebox.showwarning("Warning", "Please load the dataset before displaying!")
            return
        # Create a new window for user input
        remove_window = tk.Toplevel(self.master)
        remove_window.title("Remove Columns")

        tk.Label(remove_window, text="Enter column names separated by commas:").pack(pady=10)

        # Entry widget for user input
        entry_var = tk.StringVar()
        entry = tk.Entry(remove_window, textvariable=entry_var)
        entry.pack(pady=10)

        # Button to trigger column removal based on user input
        tk.Button(remove_window, text="Remove Columns", command=lambda: self.remove_columns(entry_var.get(), remove_window)).pack(pady=10)


    def remove_columns(self, columns_input, remove_window):
        try:
            self.columns_to_remove = [col.strip() for col in columns_input.split(',')]
            self.dataset = self.dataset.drop(columns=self.columns_to_remove, errors='ignore', axis=1)
        except Exception as e:
            print(f"Error removing columns: {e}")
        finally:
            self.clear()
            remove_window.destroy()

    def handle_outliers(self):
        if self.dataset is None or self.dataset.empty:
            messagebox.showwarning("Warning", "Please load the dataset before displaying!")
            return

        # Identify and handle outliers in numeric columns
        numeric_columns = self.dataset.select_dtypes(include='number').columns
        z_scores = self.dataset[numeric_columns].apply(lambda x: (x - x.mean()) / x.std())
        self.dataset = self.dataset[(z_scores.abs() < 3).all(axis=1)]  # Keep data within 3 standard deviations

        # Inform the user about successful removal of outliers
        self.show_outliers_removed_message()

    def show_outliers_removed_message(self):
        messagebox.showinfo("Outliers Removed", "Outliers have been removed successfully!")

        

    def scale_data(self):
        if self.dataset is None or self.dataset.empty:
            messagebox.showwarning("Warning", "Please load the dataset before displaying!")
            return
    
        scale_window = tk.Toplevel(self.master)
        scale_window.title("Data Scaling")
    
        visualization_types = ["Standardization", "Normalization"]
        selected_type = tk.StringVar(scale_window)
        selected_type.set(visualization_types[0])
    
        type_dropdown = ttk.Combobox(scale_window, values=visualization_types, textvariable=selected_type)
        type_dropdown.pack(pady=10)
    
        visualize_button = tk.Button(scale_window, text="Choose the Data Scaling", command=lambda: self.scaling_selected(selected_type.get()))
        visualize_button.pack(pady=10)
    
    def scaling_selected(self, selected_type):
        if selected_type == "Standardization":
            self.standardize_data()
        elif selected_type == "Normalization":
            self.normalize_data()
    
    def standardize_data(self):
        from sklearn.preprocessing import StandardScaler
        
        # Extract input features (X) and target variable (y) columns
        X_columns = [self.box2.get(idx) for idx in range(self.box2.size())]
        y_columns = [self.box3.get(idx) for idx in range(self.box3.size())]
    
        # Standardize only the input features (X)
        X_scaled = self.dataset[X_columns].values
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X_scaled)
    
        # Replace the original input features with the scaled values
        self.dataset[X_columns] = X_scaled
        
        # Display a message box indicating successful standardization
        self.show_scaling_message("Standardization")
    
    def normalize_data(self):
        from sklearn.preprocessing import MinMaxScaler
        
        # Extract input features (X) and target variable (y) columns
        X_columns = [self.box2.get(idx) for idx in range(self.box2.size())]
        y_columns = [self.box3.get(idx) for idx in range(self.box3.size())]
    
        # Normalize only the input features (X)
        X_normalized = self.dataset[X_columns].values
        scaler = MinMaxScaler()
        X_normalized = scaler.fit_transform(X_normalized)
    
        # Replace the original input features with the normalized values
        self.dataset[X_columns] = X_normalized
        
        # Display a message box indicating successful normalization
        self.show_scaling_message("Normalization")

    
    def show_scaling_message(self, scaling_type):
        messagebox.showinfo("Data Scaling", f"Data has been {scaling_type.lower()} successfully!")

        
    def encode_categorical(self):
        if self.dataset is None or self.dataset.empty:
            messagebox.showwarning("Warning", "Please load the dataset before displaying!")
            return
    
        # Find categorical columns
        categorical_columns = self.dataset.select_dtypes(include='object').columns
    
        if categorical_columns.empty:
            print("No categorical columns found for encoding.")
            return
    
        encode_window = tk.Toplevel(self.master)
        encode_window.title("Encode Categorical Variables")
    
        encode_options = ["Label Encoding", "One-Hot Encoding"]
        selected_option = tk.StringVar(encode_window)
        selected_option.set(encode_options[0])
    
        encode_dropdown = ttk.Combobox(encode_window, values=encode_options, textvariable=selected_option)
        encode_dropdown.pack(pady=10)
    
        encode_button = tk.Button(encode_window, text="Encode", command=lambda: self.encode_selected(selected_option.get()))
        encode_button.pack(pady=10)
    
    def encode_selected(self, selected_option):
        if selected_option == "Label Encoding":
            self.label_encode_data()
        elif selected_option == "One-Hot Encoding":
            self.one_hot_encode_data()
    
    def label_encode_data(self):
        from sklearn.preprocessing import LabelEncoder
    
        # Find categorical columns
        categorical_columns = self.dataset.select_dtypes(include='object').columns
    
        # Apply label encoding to each categorical column
        label_encoder = LabelEncoder()
        self.dataset[categorical_columns] = self.dataset[categorical_columns].apply(label_encoder.fit_transform)
    
        # Display a message box indicating successful label encoding
        self.show_encoding_message("Label Encoding")
    
    def one_hot_encode_data(self):
        # Use pandas get_dummies for one-hot encoding
        self.dataset = pd.get_dummies(self.dataset, drop_first=True)
    
        # Display a message box indicating successful one-hot encoding
        self.show_encoding_message("One-Hot Encoding")
    
    def show_encoding_message(self, encoding_type):
        message = f"{encoding_type} completed successfully!"
        messagebox.showinfo("Encoding Completed", message)


    def select_and_train_model(self):
        if self.dataset is None or self.dataset.empty:
            messagebox.showwarning("Warning", "Please load the dataset before displaying!")
            return
        model_window = tk.Toplevel(self.master)
        model_window.title("Select Model")

        model_types = ["Linear Regression", "Logistic Regression", "Decision Tree", "Random Forest", "Naive Bayes", "Support Vector Machine", "K-Means", "K-Nearest Neighbors", "Neural Network"]
        selected_model = tk.StringVar(model_window)
        selected_model.set(model_types[0])

        model_dropdown = ttk.Combobox(model_window, values=model_types, textvariable=selected_model)
        model_dropdown.pack(pady=10)

        train_button = tk.Button(model_window, text="Train", command=lambda: self.train_model(selected_model.get()))
        train_button.pack(pady=10)

    def train_model(self, selected_model):
        from sklearn.preprocessing import StandardScaler
        if self.dataset is None or self.dataset.empty:
            print("Please load the dataset before training!")
            return

        X_columns = [self.box2.get(idx) for idx in range(self.box2.size())]
        y_columns = [self.box3.get(idx) for idx in range(self.box3.size())]

        # You can handle multiple X and Y columns based on your application's logic
        X = self.dataset[X_columns].values if X_columns else None
        y = self.dataset[y_columns].values if y_columns else None

        y = y.ravel() if y is not None else None

        X = X.reshape(-1, 1) if X is not None and X.shape[1] == 1 else X

        # Check if both X and y are not None before splitting
        if X is not None and y is not None:
            # Split the data into training and testing sets
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    
        if selected_model == "Linear Regression":
            model = LinearRegression()
        elif selected_model == "Logistic Regression":
            model = LogisticRegression()
        elif selected_model == "Decision Tree":
            model = DecisionTreeClassifier()
        elif selected_model == "Random Forest":
            model = RandomForestClassifier()
        elif selected_model == "Naive Bayes":
            model = GaussianNB()
        elif selected_model == "Support Vector Machine":
            model = SVC()
        elif selected_model == "K-Means":
            model = KMeans()
        elif selected_model == "K-Nearest Neighbors":
            model = KNeighborsClassifier()
        elif selected_model == "Neural Network":
            model = MLPClassifier()
    
        # Train the selected model
        model.fit(self.X_train,self.y_train)
        self.trained_model = model
    
        


    def test(self):
        if self.trained_model is None:
            messagebox.showwarning("Warning", "Please train a model before testing!")
            return
    
        # Predict using the trained model
        y_pred = self.trained_model.predict(self.X_test)
    
        # Check the type of the selected model
        if isinstance(self.trained_model, (LinearRegression)):
            # Use regression metrics for regression models
            mse = mean_squared_error(self.y_test, y_pred)
            r2 = r2_score(self.y_test, y_pred)
    
            messagebox.showinfo("Test Results", f"Mean Squared Error: {mse:.2f}\nR^2 Score: {r2:.2f}")
        else:
            # Use classification metrics for other models
            accuracy = accuracy_score(self.y_test, y_pred)
            report = classification_report(self.y_test, y_pred)
    
            messagebox.showinfo("Test Results", f"Accuracy: {accuracy:.2f}\n\nClassification Report:\n{report}")
    
    def show_result(self):
        if self.trained_model is None:
            messagebox.showwarning("Warning", "Please train a model before showing results!")
            return
    
        # Predict using the trained model
        y_pred = self.trained_model.predict(self.X_test)
    
        results_window = tk.Toplevel(self.master)
        results_window.title("Results")
    
        # Check the type of the selected model
        if isinstance(self.trained_model, (LinearRegression)):
            # Use regression metrics for regression models
            mse = mean_squared_error(self.y_test, y_pred)
            r2 = r2_score(self.y_test, y_pred)
    
            # Display regression metrics
            mse_label = tk.Label(results_window, text=f"Mean Squared Error: {mse:.2f}")
            mse_label.pack(pady=10)
    
            r2_label = tk.Label(results_window, text=f"R^2 Score: {r2:.2f}")
            r2_label.pack(pady=10)
    
            # Display a scatter plot of true vs predicted values
            fig, ax = plt.subplots(figsize=(8, 6))
            ax.scatter(self.y_test, y_pred, color='blue')
            ax.plot([min(self.y_test), max(self.y_test)], [min(self.y_test), max(self.y_test)], linestyle='--', color='red', linewidth=2)
            ax.set_title('True vs Predicted Values')
            ax.set_xlabel('True Values')
            ax.set_ylabel('Predicted Values')
    
        else:
            # Use classification metrics for other models
            accuracy = accuracy_score(self.y_test, y_pred)
    
            # Display classification metrics
            accuracy_label = tk.Label(results_window, text=f"Accuracy: {accuracy:.2%}")
            accuracy_label.pack(pady=10)
    
            # Display the graphical representation
            fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10, 4), dpi=100)
    
            # Plot accuracy bar chart
            ax1.bar(['Accuracy', 'Error'], [accuracy, 1 - accuracy], color=['blue', 'red'])
            ax1.set_ylim(0, 1)
            ax1.set_ylabel('Percentage')
            ax1.set_title('Model Evaluation')
    
            # Plot confusion matrix
            cm = confusion_matrix(self.y_test, y_pred)
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax2)
            ax2.set_title('Confusion Matrix')
            ax2.set_xlabel('Predicted')
            ax2.set_ylabel('True')
    
        # Embed the Matplotlib figure in the Tkinter window
        canvas = FigureCanvasTkAgg(fig, master=results_window)
        canvas_widget = canvas.get_tk_widget()
        canvas_widget.pack()
    
        # Display text explanation for classification metrics
        if not isinstance(self.trained_model, (LinearRegression)):
            explanation_text = (
                "The accuracy represents the proportion of correctly classified instances.\n"
                "The error represents the proportion of incorrectly classified instances."
            )
            explanation_label = tk.Label(results_window, text=explanation_text, wraplength=500)
            explanation_label.pack(pady=10)
    
        results_window.mainloop()

if __name__ == "__main__":
    root = tk.Tk()
    app = MachineLearningApp(root)
    root.mainloop()

Dataset selected: C:/Users/hp/Downloads/cardekho_data.csv
Dataset selected: C:/Users/hp/Downloads/cardekho_data.csv
Dataset selected: C:/Users/hp/Downloads/cardekho_data.csv
Dataset selected: C:/Users/hp/Downloads/IRIS.csv




Dataset selected: C:/Users/hp/Downloads/cardekho_data.csv
Dataset selected: C:/Users/hp/Downloads/cardekho_data.csv
