In [4]:
import io
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import matplotlib.pyplot as plt
import pandas as pd
import chardet
import numpy as np
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, mean_squared_error, r2_score
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import requests
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, plot_tree

In [2]:
df = pd.DataFrame
cols = []
nulls = []
features = []
inputs = []
target = ""
extension = ""
file_name = ""

In [3]:
def change_source(event):
    source_selected = sourceMenu.get()
    if source_selected == "By local":
        localBtn.grid(row=1, column=0)
        linkBtn.grid_forget()
        linkEntry.grid_forget()
    if source_selected == "By link":
        localBtn.grid_forget()
        linkBtn.grid(row=1, column=0)
        linkEntry.grid(row=1, column=1)


def get_encoding(file_path):
    try:
        with open(file_path, 'rb') as file:
            detected = chardet.detect(file.read())
            encoding = detected.get("encoding")
            assert encoding, "Unable to detect encoding, is it a binary file?"
            return encoding
    except Exception as e:
        print(f"Error in detecting encoding: {e}")
        return None


def choose_file_local():
    global df
    global cols
    global nulls
    global inputs
    global target
    global extension
    global file_name
    global features

    # process path
    file_path = filedialog.askopenfilename()
    print(file_path)
    if file_path:
        exts = ['xls', 'xlsx', 'csv']
        exts_split = file_path.rsplit('.', 1)
        extension = exts_split[1]
        print(extension)
        if extension not in exts:
            print("Incorrect type of file.")
            return
        encoding = get_encoding(file_path)
        try:
            with open(file_path, 'r') as file:
                global df
                if extension in ['xls', 'xlsx']:
                    df = pd.read_excel(file_path, encoding=encoding)
                if extension == 'csv':
                    df = pd.read_csv(file_path, encoding=encoding)
        except Exception as e:
            print(f"Error in load data: {e}")

        # get core info: name, columns
        name_split = file_path.rsplit('/', 1)
        file_name = name_split[1]
        dataName.config(text=file_name)

        cols = df.columns.values.tolist()
        inputs = []
        target = ""

        # statistic: shape, null, duplicates
        display_statistic()

        # preview
        display_preview()


def get_data_online(file_path):
    data = requests.get(file_path, allow_redirects=True).content
    detected = chardet.detect(data)

    encoding = detected.get("encoding")
    assert encoding, "Unable to detect encoding, is it a binary file?"

    data = data.decode(encoding)
    return data


def find_online_data():
    global df
    global cols
    global nulls
    global inputs
    global target
    global extension
    global file_name
    global features

    file_path = linkEntry.get()
    print(file_path)
    if file_path:
        exts = ['xls', 'xlsx', 'csv']
        exts_split = file_path.rsplit('.', 1)
        extension = exts_split[1]
        print(extension)
        if extension not in exts:
            dialogWindow = tk.Tk()
            dialogWindow.geometry("300x300")
            label = tk.Label(dialogWindow, text="Link is not excel or csv file!")
            label.pack()
            dialogWindow.mainloop()
            print("Incorrect type of file.")
            return
        try:
            data = get_data_online(file_path)

            # with open(file_path, 'r') as file:
            if extension in ['xls', 'xlsx']:
                df = pd.read_excel(io.StringIO(data), sep='\t')
            if extension == 'csv':
                df = pd.read_csv(io.StringIO(data), sep='\t')
        except Exception as e:
            dialogWindow = tk.Tk()
            dialogWindow.geometry("300x300")
            label = tk.Label(dialogWindow, text="Link is null!")
            label.pack()
            dialogWindow.mainloop()
            print(f"Error in load data: {e}")

        # get core info: name, columns
        name_split = file_path.rsplit('/', 1)
        file_name = name_split[1]
        dataName.config(text=file_name)

        cols = df.columns.values.tolist()
        inputs = []
        target = ""

        # statistic: shape, null, duplicates
        display_statistic()

        # preview
        display_preview()


def display_statistic():
    # global df
    # if df:
    frameVl.grid(row=0, column=0, columnspan=1, sticky="nsew")
    frameMd.grid_forget()
    frameVisual.grid_forget()
    display_shape()
    display_null()
    display_duplicates()

def display_shape():
    global df
    shapeLb.config(text=f"Shape: {df.shape}")


def del_row(col_name):
    global df

    # drop all row in col_name
    df = df.dropna(subset=col_name)
    print(f"Drop row have null {col_name} value")

    # update: shape, null, duplicates, preview
    display_shape()
    display_null()
    display_duplicates()
    display_preview()

    # update shape
    # update null
    # update duplicates
    # update data preview
    # update target
    # update inputs


def del_col(col_name):
    global df
    global cols

    # drop col_name
    df = df.drop(col_name, axis=1)
    print(f"Drop col {col_name}")

    # update: cols, shape, null, duplicates, preview
    cols = df.columns.values.tolist()
    display_shape()
    display_null()
    display_duplicates()
    display_preview()


def drop_all_null():
    global df
    global cols
    global nulls

    # drop row or col by percentage of null
    for col in cols:
        if nulls[col] >= 0.5:
            df = df.drop(col, axis=1)
        else:
            df = df.dropna(subset=col)

    # update: cols, shape, null, duplicates, preview
    cols = df.columns.values.tolist()
    display_shape()
    display_null()
    display_duplicates()
    display_preview()


def drop_duplicated():
    global df
    df = df.drop_duplicates()
    print("Drop duplicate!")

    # update: shape, null, duplicates, preview
    display_shape()
    display_null()
    display_duplicates()
    display_preview()


def display_null():
    global df
    global cols
    global nulls

    nulls = df.isnull().mean()
    null_sum = df.isnull().sum().sum()
    nullValueLb.config(text=f"Null Value: {null_sum} values.")

    # delete all in frame
    for widget in nullFrame.winfo_children():
        widget.destroy()

    # display null value
    if null_sum > 0.0:
        for i, col in enumerate(cols):
            if nulls[col] > 0.0:
                nullRow = tk.Frame(nullFrame)
                nullRow.grid(row=i, column=0, padx=10, pady=5, sticky="ew")
                attName = tk.Label(nullRow, text=f"{col}", width=15)
                attName.grid(row=0, column=0, columnspan=1, padx=(0, 10), sticky="w")
                nullPer = tk.Label(nullRow, text=f"{nulls[col]:.2%}", width=10)
                nullPer.grid(row=0, column=1, columnspan=1, padx=(0, 10), sticky="e")
                ##null btns
                frameNullBtn = ttk.Frame(nullRow)
                frameNullBtn.grid(row=0, column=2, columnspan=1)
                drRowBtn = tk.Button(frameNullBtn, text="drop Row", fg='white', bg='#0096FF',
                                     command=lambda col=col: del_row(col))
                drRowBtn.grid(row=0, column=0, columnspan=1)
                drColBtn = tk.Button(frameNullBtn, text="drop Col", fg='white', bg='#0096FF',
                                     command=lambda col=col: del_col(col))
                drColBtn.grid(row=0, column=1, columnspan=1)
        delNullBtn.grid(row=4, column=1, columnspan=1)


def display_duplicates():
    global df

    duplicate_sum = df.duplicated().sum()
    dupicatedLb.config(text=f"Duplicates: {duplicate_sum} values")
    if duplicate_sum > 0:
        dpDelBtn.grid(row=5, column=1, columnspan=1)
    else:
        dpDelBtn.grid_forget()


def display_preview():
    global df

    num_row = int(numRowEntry.get())
    df_head = df.head(num_row)

    # delete content
    dataText.delete("1.0", tk.END)

    # display new content
    dataText.insert(tk.END, df_head)


def display_chart_var():
    global cols

    # x_listbox.delete(0, tk.END)
    # for item in cols:
    #     x_listbox.insert(tk.END, str(item))

    # y_listbox.delete(0, tk.END)
    # for item in cols:
    #     y_listbox.insert(tk.END, str(item))
    x_listbox['values'] = cols
    x_listbox.current(0)
    y_listbox['values'] = cols
    y_listbox.current(0)


def display_visualize():
    # global df
    # if df:
    frameVl.grid_forget()
    frameMd.grid_forget()
    frameVisual.grid(row=0, column=0, columnspan=1, sticky="nsew")

    display_chart_var()

def display_features_and_inputs():
    global features
    global inputs
    global target

    features = list(set(cols) - {target} - set(inputs))

    prepareListbox.delete(0, tk.END)
    for item in features:
        prepareListbox.insert(tk.END, str(item))

    inputListbox.delete(0, tk.END)
    for item in inputs:
        inputListbox.insert(tk.END, str(item))


def display_model():
    global df
    global features
    global target

    # if df:
    frameVl.grid_forget()
    frameMd.grid(row=0, column=0, columnspan=1, sticky="nsew")
    frameVisual.grid_forget()

    # target
    targetMenu['values'] = cols
    targetMenu.current(0)
    target = cols[0]

    # remaining features
    # features = list(set(cols) - {target} - set(inputs))
    display_features_and_inputs()


def change_target(event):
    global target
    global inputs
    global features
    global cols

    target = targetMenu.get()
    display_features_and_inputs()


def add_input():
    global features
    global inputs

    chosen_features = prepareListbox.curselection()
    if len(chosen_features) != 0:
        selected_values = [prepareListbox.get(idx) for idx in chosen_features]
        selected_array = np.array(selected_values, dtype=object)

        # upadate remaining features + inputs
        features = list(set(features) - set(selected_array))
        inputs = list(set(inputs) | set(selected_array))

        display_features_and_inputs()


def remove_input():
    global features
    global inputs

    chosen_inputs = inputListbox.curselection()
    if len(chosen_inputs) != 0:
        selected_values = [inputListbox.get(idx) for idx in chosen_inputs]
        selected_array = np.array(selected_values, dtype=object)

        # upadate remaining features + inputs
        features = list(set(features) | set(selected_array))
        inputs = list(set(inputs) - set(selected_array))

        display_features_and_inputs()


def preprocess_data(data):
    # Convert categorical features to numeric
    le = LabelEncoder()
    for col in inputs:
        if data[col].dtype == "object":
            data[col] = le.fit_transform(data[col])
    if data[target].dtype == "object":
        data[target] = le.fit_transform(data[target])
    return data


def display_matrix(cm):
    fig = Figure(figsize=(6, 4))
    ax = fig.add_subplot(111)
    cax = ax.matshow(cm, cmap='Blues')
    fig.colorbar(cax)
    ax.set_title('Confusion matrix')
    canvas = FigureCanvasTkAgg(fig, master=resultTable)
    canvas.draw()
    canvas.get_tk_widget().grid(row=0, column=0)


def LR_model(x_train, x_test, y_train, y_test):
    global df
    global inputs
    global target

    # train model
    model = LinearRegression()
    model.fit(x_train, y_train)
    score = model.score(x_test, y_test)
    accuracyLb.config(text=f"Score: {score:.4f}")
    y_hat = model.predict(x_test)
    # score = str(r2_score(y_test, y_hat))
    accuracyLb.config(text=f"Coef: {model.coef_}, Intercept: {model.intercept_}")
    # resultTable.grid_forget()
    # display matrix
    # cm = confusion_matrix(y_test, np.round(y_hat))
    # display_matrix(cm)
    fig = Figure(figsize=(6, 4))
    # ax.scatter(x_test.iloc[:, 0], y_test, color='blue', label='Actual')
    # ax.bar(x_test.iloc[:, 0], y_hat, color='red', label='Predicted')
    # ax.set_title('Linear Regression')
    # plt.show()
    # canvas = FigureCanvasTkAgg(fig, master=resultTable)
    # canvas.draw()
    # canvas.get_tk_widget().grid(row=0, column=0)
    num_var = len(inputs)
    if num_var == 1:
        ax = fig.add_subplot(111)
        ax.scatter(x_test.iloc[:, 0], y_test, color='blue', label='Actual')
        ax.plot(x_test.iloc[:, 0], y_hat, color='red', label='Predicted')
        ax.set_xlabel(inputs[0])

    elif num_var == 2:
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(x_test[inputs[0]], x_test[inputs[1]], y_test, color='blue')
        ax.plot_trisurf(x_test[inputs[0]], x_test[inputs[1]], y_hat, color='red', label='Predicted')
        ax.set_xlabel(inputs[0])
        ax.set_ylabel(inputs[1])
        ax.set_zlabel(target)

    else:
        ax = fig.add_subplot(111)
        ax.plot(x_test, y_hat, color='red')

    # ax.legend()
    ax.grid(True)
    canvas = FigureCanvasTkAgg(fig, master=resultTable)
    canvas.draw()
    canvas.get_tk_widget().grid(row=0)


def LoR_model(x_train, x_test, y_train, y_test):
    global df
    global inputs
    global target

    # train model
    model = LogisticRegression(random_state=0)
    model.fit(x_train, y_train)
    y_hat = model.predict(x_test)
    score = accuracy_score(y_test, y_hat)
    print(score)
    accuracyLb.config(text=f"Accuracy: {score:.4f}")
    labels = np.unique(y_hat)
    # display matrix
    resultTable.grid(row=8, column=0, columnspan=3)
    cm = confusion_matrix(y_test, y_hat, labels=labels)
    display_matrix(cm)

    
def KNN_model(x_train, x_test, y_train, y_test):

    # train model
    k = 10
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(x_train, y_train)
    y_hat = model.predict(x_test)
    score = accuracy_score(y_test, y_hat)
    accuracyLb.config(text=f"Accuracy: {score:.4f}")
    labels = np.unique(y_test)
    print(labels)
    # display matrix
    resultTable.grid(row=8, column=0, columnspan=3)
    cm = confusion_matrix(y_test, y_hat)
    display_matrix(cm)
    
    
def decisionTree_model(x_train, x_test, y_train, y_test):
    global target

    model = DecisionTreeClassifier()
    model.fit(x_train, y_train)
    #  display tree
    # feature_names_str = [str(feature) for feature in x_train.columns]
    # class_names_str = [str(cls) for cls in model.classes_]
    # fig = Figure(figsize=(6, 4))
    # ax = fig.add_subplot(111)
    # plot_tree(model)
    # ax.set_title("Decision tree")
    fig = plt.figure(figsize=(10, 5))
    plot_tree(model, filled=True)
    plt.title("Decision Tree")

    canvas = FigureCanvasTkAgg(fig, master=resultTable)
    canvas.draw()
    canvas.get_tk_widget().grid(row=0, column=0)
    
    
def train_model():
    global inputs
    global df
    global target

    # preprocess
    data = preprocess_data(df.copy())
    data = data.dropna()

    # get data
    X = data[inputs]
    Y = data[target]
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

    # train chosen model
    model = modelMenu.get()
    if model == 'Linear Regression':
        num_var = len(inputs)
        LR_model(x_train, x_test, y_train, y_test)
    elif model == "Logistic Regression":
        LoR_model(x_train, x_test, y_train, y_test)
    elif model == "K-nearest Neighbor":
        KNN_model(x_train, x_test, y_train, y_test)
    else:
        decisionTree_model(x_train, x_test, y_train, y_test)


# def draw_chart(chart_name):
#     global df
#     x_axis = x_listbox.get()
#     y_axis = y_listbox.get()

#     fig = Figure(figsize=(6, 4))
#     ax = fig.add_subplot(111)

#     if chart_name == "pie":
#         data = df[x_axis].value_counts()
#         labels = data.index.tolist()
#         ax.pie(data, labels=labels, autopct='%1.1f%%', startangle=90, shadow=True)
#         ax.set_title(f'Pie chart of {x_axis}')
#     elif chart_name == "box":
#         data = df[x_axis]
#         ax.boxplot(data)
#         ax.set_title(f'Box plot chart of {x_axis}')
#     elif chart_name == "bar":
#         ax.bar(df[x_axis], df[y_axis])
#         ax.set_title(f'Bar chart of {x_axis} and {y_axis}')
#         plt.xlabel(x_axis)
#         plt.ylabel(y_axis)
#     elif chart_name == "scatter":
#         data = df[[x_axis, y_axis]]
#         ax.scatter(df[x_axis], df[y_axis], color='blue', alpha=0.5)
#         ax.set_title(f"Scatter char of {x_axis} and {y_axis}")
#         ax.grid(True)
#         plt.xlabel(x_axis)
#         plt.ylabel(y_axis)

#     # fig.colorbar(chart)
#     canvas = FigureCanvasTkAgg(fig, master=chart_draw_frame)
#     canvas.draw()
#     canvas.get_tk_widget().grid(row=0)
# Function to draw a chart based on selected x and y variables

def draw_chart(chart_name):
    global df

    x_axis = x_listbox.get()
    y_axis = y_listbox.get()

    if not x_axis or not y_axis:
        messagebox.showerror("Error", "Please select both x and y variables.")
        return

    if x_axis == y_axis:
        messagebox.showerror("Error", "Please select different variables for x and y.")
        return

    fig = Figure(figsize=(6, 4))
    ax = fig.add_subplot(111)

    try:
        if chart_name == "pie":
            data = df[x_axis].value_counts()
            labels = data.index.tolist()
            ax.pie(data, labels=labels, autopct='%1.1f%%', startangle=90, shadow=True)
            ax.set_title(f'Pie chart of {x_axis}')
        elif chart_name == "box":
            data = df[x_axis]
            ax.boxplot(data)
            ax.set_title(f'Box plot chart of {x_axis}')
        elif chart_name == "bar":
            ax.bar(df[x_axis], df[y_axis])
            ax.set_title(f'Bar chart of {x_axis} and {y_axis}')
            plt.xlabel(x_axis)
            plt.ylabel(y_axis)
        elif chart_name == "scatter":
            ax.scatter(df[x_axis], df[y_axis], color='blue', alpha=0.5)
            ax.set_title(f"Scatter char of {x_axis} and {y_axis}")
            ax.grid(True)
            plt.xlabel(x_axis)
            plt.ylabel(y_axis)

        canvas = FigureCanvasTkAgg(fig, master=chart_draw_frame)
        canvas.draw()
        canvas.get_tk_widget().grid(row=0)
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred while drawing the chart: {e}")


def down_data():
    global file_name

    dialogWindow = tk.Tk()
    dialogWindow.geometry("300x200")

    folder_path = filedialog.askdirectory()
    if folder_path:
        file_path = folder_path + "/" + file_name
        print(file_path)
        if extension in ['xls', 'xlsx']:
            df.to_excel(file_path)
        if extension == 'csv':
            df.to_csv(file_path)
        label = tk.Label(dialogWindow, text=f"Download successfully to link: \n{file_path}")
    else:
        label = tk.Label(dialogWindow, text="Download file failed!")
        print("Error in saving file.")

    label.pack()
    dialogWindow.mainloop()


In [None]:
# Create fe
root = tk.Tk()
root.title("Machine Learning")
root.geometry("1350x1000")
#
main_frame = ttk.Frame(root, padding=10)
main_frame.pack(fill="both", expand=True)
main_frame.grid_rowconfigure(0, weight=1)
# main frame
frame1 = ttk.Frame(main_frame, padding=10, borderwidth=1, relief="solid")
frame1.grid(row=0, column=0, columnspan=1, sticky="nsew")
frame2 = ttk.Frame(main_frame, padding=10, borderwidth=1, relief="solid")
frame2.grid(row=0, column=1, columnspan=2, sticky="nsew")

# btns frame
dataLb = ttk.Label(frame1, text="Dataset", font="bold", padding=5)
dataLb.grid(row=0, column=0, columnspan=1)
## source head
sourceList = ['By local', 'By link']
sourceMenu = ttk.Combobox(frame1, values=sourceList, width=10, state='readonly')
sourceMenu.current(0)
sourceMenu.grid(row=0, column=1, columnspan=1)
sourceMenu.bind("<<ComboboxSelected>>", change_source)
## choose source
localBtn = tk.Button(frame1, text="Choose file", command=choose_file_local, fg='white', bg='black')
localBtn.grid(row=1, column=0)
linkBtn = tk.Button(frame1, text="Find data", command=find_online_data, fg='white', bg='black')
linkBtn.pack_forget()
dataLink = False
linkEntry = tk.Entry(frame1, textvariable=dataLink, font=('calibres', 10))
linkEntry.pack_forget()
## source name
dataName = tk.Label(frame1, text="No source")
dataName.grid(row=3, column=0, columnspan=2, sticky="ew", padx=5, pady=5)
## tag btns
statisticTag = tk.Button(frame1, text="Statistic Dataset", command=display_statistic, width=20)
statisticTag.grid(row=4, column=1, columnspan=1)
visualTag = tk.Button(frame1, text="Visualize Dataset", command=display_visualize, width=20)
visualTag.grid(row=5, column=1, columnspan=1)
modelTag = tk.Button(frame1, text="Training models", command=display_model, width=20)
modelTag.grid(row=6, column=1, columnspan=1)
# # ## undo + save btn
# # undoBtn = tk.Button(frame1, text="Undo", command=undo)
# # undoBtn.grid(row=6, column=0, columnspan=2)
downBtn = tk.Button(frame1, text="Save dataset", bg='green', fg='white', command=down_data)
downBtn.grid(row=7, column=1)

# detail frame: visulaize + model
# frameMd.grid_forget()
frameVl = ttk.Frame(frame2)
frameVl.grid(row=0, column=0, columnspan=1, sticky="nsew")
# frameVl.grid_forget()
frameMd = ttk.Frame(frame2)
# frameMd.grid_forget()
frameMd.grid_forget()
#
frameVisual = ttk.Frame(frame2)
frameVisual.grid_forget()

## visulaize frame devide into 2 parts: statistic + preview
frameSt = tk.Frame(frameVl)
frameSt.grid(row=0, column=0, columnspan=1, sticky="nsew")
frameSt.columnconfigure(0, minsize=240)
framePv = ttk.Frame(frameVl)
framePv.grid(row=0, column=1, columnspan=1, sticky="nsew")
## statistic head
statisticLb = ttk.Label(frameSt, text="Statistic", font="bold", padding=5)
statisticLb.grid(row=0, column=0, columnspan=1)
## shape
shapeLb = tk.Label(frameSt, text="Shape: ( , )")
shapeLb.grid(row=1, column=0, columnspan=2)
## null value
nullValueLb = tk.Label(frameSt, text=f"Null Value: ")
nullValueLb.grid(row=2, column=0, columnspan=2)
nullFrame = tk.Frame(frameSt)
nullFrame.grid(row=3, column=0, sticky="nsew", columnspan=2)
# # # # ------------More null col ------------------
delNullBtn = tk.Button(frameSt, text="Drop all", command=drop_all_null)
delNullBtn.grid_forget()
# # make >50%...
## duplicates
dupicatedLb = tk.Label(frameSt, text="Duplicated values: ")
dupicatedLb.grid(row=5, column=0, columnspan=1)
dpDelBtn = tk.Button(frameSt, text="Drop duplicates", fg='white', bg='red', command=drop_duplicated)
dpDelBtn.grid_forget()

#Data preview
dataPreviewLb = ttk.Label(framePv, text="Data Preview", font="bold", padding=5)
dataPreviewLb.grid(row=0, column=0)
numRowFrame = tk.Frame(framePv, padx=5)
numRowFrame.grid(row=0, column=1)
numRowEntry = tk.Entry(numRowFrame, width=5)
numRowEntry.insert(0, "5")
numRowEntry.grid(row=0, column=0)
numRowLb = tk.Label(numRowFrame, text="points")
numRowLb.grid(row=0, column=1)
prevBtn = tk.Button(numRowFrame, text="Go", fg='white', bg='blue', command=display_preview)
prevBtn.grid(row=0, column=2)

## data frame
dataFrame = tk.Frame(framePv, height=10, width=20, padx=20)
dataFrame.grid(row=1, column=0, columnspan=4)
dataText = tk.Text(dataFrame)
dataText.grid(row=0, column=0)
vertical_scrollBar = ttk.Scrollbar(dataFrame, orient=tk.VERTICAL, command=dataText.yview)
vertical_scrollBar.grid(row=0, column=1, sticky="ns")
dataText.config(yscrollcommand=vertical_scrollBar.set)
horizontal_scrollBar = ttk.Scrollbar(dataFrame, orient=tk.HORIZONTAL, command=dataText.xview)
horizontal_scrollBar.grid(row=1, column=0, sticky="ew")
dataText.config(xscrollcommand=horizontal_scrollBar.set)
# data chart
# dataChart = tk.Frame(frameVl)
# dataChart.grid(row=2, column=0, columnspan=3)
#
# # bar chart bw
# # .............
#training model
trainingLb = tk.Label(frameMd, text="Training Model", font="bold")
trainingLb.grid(row=0, column=0)
targetLb = tk.Label(frameMd, text="Target variable:")
targetLb.grid(row=1, column=0)
## target var
targetDl = tk.StringVar(frameMd)
targetMenu = ttk.Combobox(frameMd, state='readonly')
targetMenu.grid(row=2, column=0, columnspan=1)
targetMenu.bind("<<ComboboxSelected>>", change_target)
## input list
inputLb = tk.Label(frameMd, text="Input variables")
inputLb.grid(row=3, column=0)
prepareListbox = tk.Listbox(frameMd, selectmode=tk.MULTIPLE)
prepareListbox.grid(row=4, column=0)
## plus + remove input var
inputBtns = tk.Frame(frameMd, padx=80)
inputBtns.grid(row=4, column=1)
addXBtn = tk.Button(inputBtns, text=">>", command=add_input, width=10)
addXBtn.grid(row=0, column=0)
removeXBtn = tk.Button(inputBtns, text="<<", command=remove_input, width=10)
removeXBtn.grid(row=1, column=0)
## input choosen
inputListbox = tk.Listbox(frameMd, selectmode=tk.MULTIPLE)
inputListbox.grid(row=4, column=2, columnspan=1)
## model
modelLb = tk.Label(frameMd, text="Training models")
modelLb.grid(row=5, column=0)
modelList = ['Linear Regression', 'Logistic Regression', 'K-nearest Neighbor', 'Decision tree']
modelDl = tk.StringVar(frameMd)
modelMenu = ttk.Combobox(frameMd, values=modelList, state='readonly')
modelMenu.current(0)
modelMenu.grid(row=6, column=0, columnspan=1)
modelMenu.bind("<<ComboboxSelected>>")
executeBtn = tk.Button(frameMd, text="execute", command=train_model, bg='blue', fg='white')
executeBtn.grid(row=6, column=1, columnspan=1)
#
accuracyLb = tk.Label(frameMd, text="Accuracy: ")
accuracyLb.grid(row=7, column=0)
resultTable = tk.Frame(frameMd)
resultTable.grid(row=8, column=0, columnspan=3)


## visualize data
vars_frame = ttk.Frame(frameVisual, width=300)
vars_frame.grid(row=0, column=0)

x_label = tk.Label(vars_frame, text="X axis")
x_label.grid(row=0)
# x_listbox = tk.Listbox(vars_frame, selectmode=tk.SINGLE)
x_listbox = ttk.Combobox(vars_frame, width=15, state='readonly')
x_listbox.grid(row=1)
# x_listbox.current(0)
y_label = tk.Label(vars_frame, text="Y axis")
y_label.grid(row=2)
# y_listbox = tk.Listbox(vars_frame, selectmode=tk.SINGLE)
y_listbox = ttk.Combobox(vars_frame, width=15, state='readonly')
y_listbox.grid(row=3)
# y_listbox.current(0)

chart_frame = ttk.Frame(frameVisual, width=600)
chart_frame.grid(row=0, column=1, padx=20)

chart_name_frame = ttk.Frame(chart_frame, height=100)
chart_name_frame.grid(row=0)
pie_btn = tk.Button(chart_name_frame, text='Pie', command=lambda chart_name="pie": draw_chart(chart_name))
pie_btn.grid(row=0, column=0)
boxplot_btn = tk.Button(chart_name_frame, text='Box plot', command=lambda chart_name="box": draw_chart(chart_name))
boxplot_btn.grid(row=0, column=1)
bar_btn = tk.Button(chart_name_frame, text='Bar', command=lambda chart_name="bar": draw_chart(chart_name))
bar_btn.grid(row=0, column=2)
scatter_btn = tk.Button(chart_name_frame, text='Scatter', command=lambda chart_name="scatter": draw_chart(chart_name))
scatter_btn.grid(row=0, column=3)

chart_draw_frame = ttk.Frame(chart_frame, height=350)
chart_draw_frame.grid(row=1)


root.mainloop()