Machine Learning (COMP6745001) - LA01
- Randy Antonio (2440034170)
- Nadya Tyandra (2440032820)
- Tiffany Angela Indryani (2440052210)

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt
matplotlib.use('TkAgg')
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score, confusion_matrix
from tkinter import *
import tkinter as tk
import tkinter.font as TkFont

In [2]:
LR = None
scaler = None
x_test = None
y_test = None
x_train = None
y_train = None
y_LR = None
accuracy = None

def runProgram():
    data = load_data()
    x, y = preprocessing(data)
    x, y = normalize(x, y)
    trainModel(x, y)
    buildGUI()

def load_data():
    data = pd.read_csv('./dataset/healthcare-dataset-stroke-data.csv')
    return data

def trainModel(x, y):
    global LR
    global x_test
    global y_test
    global x_train
    global y_train
    global y_LR
    global accuracy
    
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=100)
    
    LR = LogisticRegression(C=0.01, solver='liblinear')
    LR.fit(x_train, y_train)
    y_LR = LR.predict(x_test)

    accuracy = accuracy_score(y_test, y_LR)

def preprocessing(data):
    data = data.iloc[:,1:]

    data = data[data.smoking_status != 'Unknown']
    data = data.dropna()
    data = data.reset_index(drop=True)

    cate = ['work_type', 'gender', 'ever_married', 'Residence_type', 'smoking_status']
    for i in cate:
        data[i] = data[i].astype('category')
        data[i] = data[i].cat.codes
    x = data.iloc[:,:-1]
    y = data.iloc[:,-1]
    return x, y

def normalize(x, y):
    global scaler
    scaler = StandardScaler().fit(x)
    x = scaler.transform(x)
    return x, y

def check_result(x):
    x = x.iloc[:,1:]
    x = scaler.transform(x)
    y_pred = LR.predict(x)
    return x, y_pred

In [1]:
y_input = None

def buildGUI():
    root = tk.Tk()
    
    root.title('Stroke Predictor')
    root.iconbitmap(r'asset/brain.ico')
    root.geometry('900x600')
    root.resizable(width = False, height = False)
    
    header = TkFont.Font(family = "San Francisco", size = 24, weight = "bold")
    subheader = TkFont.Font(family = "San Francisco", size = 10)
    body = TkFont.Font(family = "San Francisco", size = 8)
    
    canvas1 = tk.Canvas(root, width = 1200, height = 600)
    canvas1.pack()
    
    canvas2 = tk.Canvas(root, width = 1200, height = 600)
    canvas2.pack()

    title = tk.Label(text = "Stroke Predictor", font = header)
    canvas1.create_window(450, 40, window = title)

    # Gender
    gender_label = tk.Label(root, text = 'Gender', font = subheader).place(x = 30, y = 90)
    var_gender = tk.StringVar()
    tk.Radiobutton(root, text = "Male", variable = var_gender, value = 1, indicator = 0, bg = "light gray").place(x = 200, y = 87)
    tk.Radiobutton(root, text = "Female", variable = var_gender, value = 0, indicator = 0, bg = "light gray").place(x = 250, y = 87)
    
    # Age
    age_label = tk.Label(root, text = 'Age', font = subheader).place(x = 30, y = 130)
    age_entry = tk.Entry(root, width = 10)
    canvas1.create_window(232, 140, window = age_entry)
    
    # Hypertension
    hypertension_label = tk.Label(root, text = 'Hypertension', font = subheader).place(x = 30, y = 170)
    var_hypertension = tk.StringVar()
    tk.Radiobutton(root, text = "Yes", variable = var_hypertension, value = 1, indicator = 0, bg = "light gray").place(x = 200, y = 170)
    tk.Radiobutton(root, text = "No", variable = var_hypertension, value = 0, indicator = 0, bg = "light gray").place(x = 250, y = 170)
    
    # Heart disease
    heart_disease_label = tk.Label(root, text = 'Heart disease', font = subheader).place(x = 30, y = 210)
    var_heart_disease = tk.StringVar()
    tk.Radiobutton(root, text = "Yes", variable = var_heart_disease, value = 1, indicator = 0, bg = "light gray").place(x = 200, y = 207)
    tk.Radiobutton(root, text = "No", variable = var_heart_disease, value = 0, indicator = 0, bg = "light gray").place(x = 250, y = 207)
    
    # Ever married
    ever_married_label = tk.Label(root, text = 'Ever married', font = subheader).place(x = 30, y = 250)
    var_ever_married = tk.StringVar()
    tk.Radiobutton(root, text = "Yes", variable = var_ever_married, value = 1, indicator = 0, bg = "light gray").place(x = 200, y = 247)
    tk.Radiobutton(root, text = "No", variable = var_ever_married, value = 0, indicator = 0, bg = "light gray").place(x = 250, y = 247)
    
    # Work type
    work_type_label = tk.Label(root, text = 'Work type', font = subheader).place(x = 30, y = 290)
    var_work_type = tk.StringVar()
    tk.Radiobutton(root, text = "Private", variable = var_work_type, value = 2, indicator = 0, bg = "light gray").place(x = 200, y = 287)
    tk.Radiobutton(root, text = "Self-employed", variable = var_work_type, value = 3, indicator = 0, bg = "light gray").place(x = 250, y = 287)
    tk.Radiobutton(root, text = "Government job", variable = var_work_type, value = 0, indicator = 0, bg = "light gray").place(x = 342, y = 287)
    
    # Residence type
    residence_type_label = tk.Label(root, text = 'Residence type', font = subheader).place(x = 30, y = 330)
    var_residence_type = tk.StringVar()
    tk.Radiobutton(root, text = "Urban", variable = var_residence_type, value = 1, indicator = 0, bg = "light gray").place(x = 200, y = 327)
    tk.Radiobutton(root, text = "Rural", variable = var_residence_type, value = 2, indicator = 0, bg = "light gray").place(x = 250, y = 327)
    
    # Average glucose level
    avg_glucose_lvl_label = tk.Label(root, text = 'Average glucose level', font = subheader).place(x = 30, y = 370)
    avg_glucose_lvl_entry = tk.Entry(root, width = 10)
    canvas1.create_window(232, 380, window = avg_glucose_lvl_entry)
    
    # BMI
    bmi = tk.Label(root, text = 'BMI', font = subheader).place(x = 30, y = 410)
    bmi_entry = tk.Entry(root, width = 10)
    canvas1.create_window(232, 420, window = bmi_entry)
    
    # Smoking status
    smoking_status = tk.Label(root, text = 'Smoking status', font = subheader).place(x = 30, y = 450)
    var_smoking_status = tk.StringVar()
    tk.Radiobutton(root, text = "Never smoked", variable = var_smoking_status, value = 1, indicator = 0, bg = "light gray").place(x = 200, y = 450)
    tk.Radiobutton(root, text = "Formerly smoked", variable = var_smoking_status, value = 0, indicator = 0, bg = "light gray").place(x = 290, y = 450)
    tk.Radiobutton(root, text = "Smokes", variable = var_smoking_status, value = 2, indicator = 0, bg = "light gray").place(x = 397, y = 450)
    
    # Credit
    title = tk.Label(text = "Randy Antonio (2440034170), Nadya Tyandra (2440032820), Tiffany Angela Indryani (2440052210) - Machine Learning (COMP6745001) - LA01", font = body)
    canvas1.create_window(450, 590, window = title)
    
    def nor():
        age = int(age_entry.get())
        avg_glucose_lvl = float(avg_glucose_lvl_entry.get())
        bmi = float(bmi_entry.get())
        gender = var_gender.get()
        hypertension = var_hypertension.get()
        heart_disease = var_heart_disease.get()
        ever_married = var_ever_married.get()
        work_type = var_work_type.get()
        residence_type = var_residence_type.get()
        smoking_status = var_smoking_status.get()
        res = {'id': [0],
               'gender': [gender],
               'age': [age],
               'hypertension': [hypertension],
               'heart_disease': [heart_disease],
               'ever_married': [ever_married],
               'work_type': [work_type],
               'Residence_type': [residence_type],
               'avg_glucose_lvl': [avg_glucose_lvl],
               'bmi': [bmi],
               'smoking_status': [smoking_status]}
        return res

    def get_res():
        global y_input
        normalized_age = tk.Label(root, text = '                                             ', font = body).place(x = 200, y = 150)
        normalized_avg_glucose_lvl = tk.Label(root, text = '                                             ', font = body).place(x = 200, y = 390)
        normalized_bmi = tk.Label(root, text = '                                             ', font = body).place(x = 200, y = 430)
    
        res = nor()
        res = pd.DataFrame.from_dict(res)
        x, y_input = check_result(res)
        
        NAge = x[:,1]
        NGlucose = x[:,7]
        NBMI = x[:,8]
        
        normalized_age = tk.Label(root, text = NAge, font = body).place(x = 200, y = 150)
        normalized_avg_glucose_lvl = tk.Label(root, text = NGlucose, font = body).place(x = 200, y = 390)
        normalized_bmi = tk.Label(root, text = NBMI, font = body).place(x = 200, y = 430)
        button2['state'] = tk.NORMAL
    
    button1 = tk.Button (root, text = "Normalize", font = subheader, command = get_res, fg = "white", bg = "black", activebackground = "white", activeforeground = "black")
    canvas1.create_window(70, 545, window = button1)
    
    def makePrediction():
        Label2 = tk.Label(root, text = "Result             ", font = header, fg = "black").place(x=600, y=70)
        if(y_input[0] == 1):
            label2 = tk.Label(root, text = "Stroke", font = header, fg = "red").place(x=600, y=70)
        if(y_input[0] == 0):
            label2 = tk.Label(root, text = "Healthy", font = header, fg = "green").place(x=600, y=70)
        
        data = load_data()
        model_info_label = tk.Label(root, text = 'Model Information', font = subheader).place(x = 600, y = 115)
        algorithm_label = tk.Label(root, text = 'Algorithm: Logistic Regression', font = subheader).place(x = 500, y = 145)
        splitting_label = tk.Label(root, text = 'Splitting ratio: 70% : 30%', font = subheader).place(x = 500, y = 175)
        scaler_label = tk.Label(root, text = 'Scaler: Standard Scaler', font = subheader).place(x = 500, y = 205)
        accuracy_label = tk.Label(root, text = 'Accuracy: ' + str(accuracy), font = subheader).place(x = 500, y = 235)
    
        x, y = preprocessing(data)
        x, y = normalize(x, y)
        trainModel(x, y)
        cnf_matrix = np.array(confusion_matrix(y_test, y_LR))
        confusion = pd.DataFrame(cnf_matrix, index = ['Healthy', 'Stroke'], columns = ['Healthy', 'Stroke'])
        figure1 = plt.figure(figsize=(4,4))
        ax1 = figure1.add_subplot()
        bar1 = FigureCanvasTkAgg(figure1, root)
        bar1.get_tk_widget().place(x = 525, y = 270)
        df1 = sns.heatmap(confusion, fmt = '', cmap = 'Blues')
        ax1.set_xlabel('True Label')
        ax1.set_ylabel('Predicted Label')
        ax1.set_title('Confusion Matrix')
        root.mainloop()
    
    button2 = tk.Button (root, text = "Make Prediction", font = subheader, command = makePrediction, fg = "white", bg = "black", activebackground = "white", activeforeground = "black", state = tk.DISABLED)
    canvas1.create_window(177, 545, window = button2)
    
    def showCorrelation():
        root = tk.Tk()
        root.title('Correlation Matrix')
        root.iconbitmap(r'asset/correlation.ico')
        root.geometry('1300x800')
        subheader = TkFont.Font(family = "San Francisco", size = 10)
        data = load_data()
        figure1 = plt.figure(figsize=(11, 9))
        ax1 = figure1.add_subplot(111)
        fig1 = FigureCanvasTkAgg(figure1, root)
        fig1.get_tk_widget().place(x = 50, y = 5)
        corr = sns.heatmap(data.corr(), annot=True, cmap = 'YlGnBu')
        ax1.set_title('Correlation Matrix')
        
        heart_disease_label = tk.Label(root, text = 'The variable with strongest correlation with', font = subheader).place(x = 855, y = 150)
        heart_disease_label = tk.Label(root, text = 'stroke is age, which makes sense as in real', font = subheader).place(x = 855, y = 180)
        heart_disease_label = tk.Label(root, text = 'life, stroke is usually related to old age.', font = subheader).place(x = 855, y = 210)
        heart_disease_label = tk.Label(root, text = 'Hypertension, heart disease, and glucose', font = subheader).place(x = 855, y = 240)
        heart_disease_label = tk.Label(root, text = 'level are the second most correlated', font = subheader).place(x = 855, y = 270)
        heart_disease_label = tk.Label(root, text = 'variable to stroke as most of people who', font = subheader).place(x = 855, y = 300)
        heart_disease_label = tk.Label(root, text = 'has high blood tension, heart disease,', font = subheader).place(x = 855, y = 330)
        heart_disease_label = tk.Label(root, text = 'and high glucose level tend to have stroke.', font = subheader).place(x = 855, y = 360)
        heart_disease_label = tk.Label(root, text = 'ID is the least correlated variable as it is', font = subheader).place(x = 855, y = 390)
        heart_disease_label = tk.Label(root, text = 'an identifier of the person, therefore id', font = subheader).place(x = 855, y = 420)
        heart_disease_label = tk.Label(root, text = 'is dropped.', font = subheader).place(x = 855, y = 450)
        root.mainloop()
    
    button3 = tk.Button (root, text = "Show Correlation", font = subheader, command = showCorrelation, fg = "white", bg = "black", activebackground = "white", activeforeground = "black")
    canvas1.create_window(303, 545, window = button3)
    
    def showPlot():
        data = load_data()
        data['stroke'] = data['stroke'].astype('string')
        data.loc[(data.stroke == '0'),'stroke']='Healthy'
        data.loc[(data.stroke == '1'),'stroke']='Stroke'
        plotting = sns.relplot(x='age', y='avg_glucose_level', hue='stroke', palette=['r','g'], style='hypertension', data=data)
    
    button4 = tk.Button (root, text = "Show Plot", font = subheader, command = showPlot, fg = "white", bg = "black", activebackground = "white", activeforeground = "black")
    canvas1.create_window(412, 545, window = button4)
    
    root.mainloop()

In [4]:
if __name__ == "__main__":
    runProgram()