In [125]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, f1_score
from scipy import stats
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

#Function to prompt the user to provide the filename.
def get_filename():
    
    while True:
        filename = input('Import a csv or an excel file: ')
        if os.path.exists(filename):
            if filename.split('.')[-1].lower() == 'csv' or filename.split('.')[-1].lower() == 'xlsx':
                return filename
            else:
                print('Invalid file format. Please try again')
        else:
            print(f'Error: There is no file called: {filename}. Please try again.')
    
#Funtion to read a file based on its extension.    
def read_file(filename,file_extension):
    
    if file_extension == 'csv':
        while True:
            delim = input('Please enter the delimiter of the csv file. It must be either ";" or ",": ')
            if delim == ',' or delim == ';':
                return pd.read_csv(filename, delimiter = delim)
            else:
                print('Invalid delimiter. Please try again.')
    else:
        return pd.read_excel(filename)         

#Function for the kNN algorithm
def kNN_algorithm(data_frame):
    X = data_frame.iloc[:, :-1]
    y = data_frame.iloc[:, -1]
    unique_classes = y.unique()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    while True:
        try:
            k = input('Specify the number of neighboors "k" for the kNN classification: ')
            k = int(k)
            break
        except:
            print('Invalid input. Please try again')
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test.values)
    accuracy = round((accuracy_score(y_test, y_pred)*100),2)
    precision = round((precision_score(y_test, y_pred, labels=unique_classes, average='micro')*100),2)
    recall = round((recall_score(y_test, y_pred, labels=unique_classes, average='micro')*100),2)
    cm = confusion_matrix(y_test, y_pred, labels=unique_classes)
    f1 = round((f1_score(y_test, y_pred, average='micro')*100),3)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-Score:\n", f1)
    print("Confusion Matrix:\n", cm)

#Function for the k-means algorithm
def kmeans_algorithm(data_frame):
    while True:
        try:
            k = input('Specify the number of clusters "k" for the k-means clustering algorithm: ')
            k = int(k)
            break
        except:
            print('Invalid input. Please try again')
    X = data_frame.iloc[:, :-1]
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(X)
    labels = kmeans.labels_
    inertia = kmeans.inertia_
    silhouette = round((silhouette_score(X, labels)*100),2)
    print(f"Silhouette Score: {silhouette}")

    
    
    
filename = get_filename()
file_extension = filename.split('.')[-1].lower()
data_frame = read_file(filename,file_extension)
kNN_algorithm(data_frame)
kmeans_algorithm(data_frame)

Import a csv or an excel file:  1.csv
Please enter the delimiter of the csv file. It must be either ";" or ",":  ;
Specify the number of neighboors "k" for the kNN classification:  8




Accuracy: 62.26
Precision: 62.26
Recall: 62.26
F1-Score:
 62.26
Confusion Matrix:
 [[176 114  26]
 [ 40 344  34]
 [ 42  78  31]]


Specify the number of clusters "k" for the k-means clustering algorithm:  4


  super()._check_params_vs_input(X, default_n_init=10)


Silhouette Score: 70.62
