In [4]:
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, f1_score

#Function that implements the kNN classification algorithm
def kNN_algorithm(data_frame):
    X = data_frame.iloc[:, :-1]
    y = data_frame.iloc[:, -1]
    unique_classes = y.unique()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    training_data = pd.concat([X_train, y_train], axis=1)
    testing_data = pd.concat([X_test, y_test], axis=1)
    training_data = data_preprocess_for_the_supervised_algorithms(training_data, testing_data, 1)
    testing_data = data_preprocess_for_the_supervised_algorithms(training_data, testing_data, 2)
    X_train, y_train = training_data.iloc[:, :-1], training_data.iloc[:, -1]
    X_test, y_test = testing_data.iloc[:, :-1], testing_data.iloc[:, -1]
    while True:
        try:
            k= input('Specify the number of neighbours "k" for the kNN classification: ')
            k= int(k)
            break
        except:
            print('Invalid input. Please try again')
    print(k)
    X_train = X_train.values
    X_test = X_test.values  
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    
    accuracy = round((accuracy_score(y_test, y_pred)*100),2)
    precision = round((precision_score(y_test, y_pred, labels=unique_classes, average='micro')*100),2)
    recall = round((recall_score(y_test, y_pred, labels=unique_classes, average='micro')*100),2)
    cm = confusion_matrix(y_test, y_pred, labels=unique_classes)
    f1 = round((f1_score(y_test, y_pred, average='micro')*100),3)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-Score:\n", f1)
    print("Confusion Matrix:\n", cm)
    
#Function that implements the Logistic regression classification algorithm
def logistic_regression(data_frame, random_state=None):
    while True:
        try:
            test_size = input('Specify the test size (0.0,0.5] (We recommend to not use a test size greater than 0.2): ')
            test_size = float(test_size)
            if test_size > 0.0 and test_size <= 0.5:
                break
            else:
                print('Invalid test size.')
                continue
        except:
            print('Invalid input. Please try again')
    print(test_size)
    X = data_frame.iloc[:, :-1]
    y = data_frame.iloc[:, -1]
    unique_classes = y.unique()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    training_data = pd.concat([X_train, y_train], axis=1)
    testing_data = pd.concat([X_test, y_test], axis=1)
    training_data = data_preprocess_for_the_supervised_algorithms(training_data, testing_data, 1)
    testing_data = data_preprocess_for_the_supervised_algorithms(training_data, testing_data, 2)
    X_train, y_train = training_data.iloc[:, :-1], training_data.iloc[:, -1]
    X_test, y_test = testing_data.iloc[:, :-1], testing_data.iloc[:, -1]
    X_train = X_train.values
    X_test = X_test.values
    
    model = LogisticRegression(random_state=random_state)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    accuracy = round((accuracy_score(y_test, y_pred)*100),2)
    precision = round((precision_score(y_test, y_pred, labels=unique_classes, average='micro')*100),2)
    recall = round((recall_score(y_test, y_pred, labels=unique_classes, average='micro')*100),2)
    cm = confusion_matrix(y_test, y_pred, labels=unique_classes)
    f1 = round((f1_score(y_test, y_pred, average='micro')*100),3)
    
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-Score:\n", f1)
    print("Confusion Matrix:\n", cm)