In [None]:
'''
KNN chooses K nearest data items for the data set.

It does prediction based on these k neighbours of the input

'''

'\nKNN chooses K nearest data items for the data set.\n\nIt does prediction based on these k neighbours of the input\n\n'

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.datasets import fetch_openml

In [None]:
class KnnClassifier:
    def __init__(self, data, k, train_size):
        # Initialize with data and split into X (independent) and Y (dependent)
        self.df           =   data
        self.X            =   self.df.drop(columns='target')
        self.Y            =   self.df['target']
        self.X_train      =   None
        self.Y_train      =   None
        self.X_test       =   None
        self.Y_test       =   None
        self.Y_pred       =   None
        self.train_size    =   train_size
        self.k            =   k
        self.knn_model    =   KNeighborsClassifier(self.k)
        self.accuracy     =   None
        self.precision    =   None
        self.f1_Score     =   None
        self.specificity  =   None
        self.serendipity  =   None
        self.recall_sensitivity  =   None

    # Split data into training and test sets
    def splitData(self):

        # Split into training and testing sets
        self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(self.X, self.Y, train_size=self.train_size, random_state=42)

        # Normalize/Standardize features (important for KNN)
        scaler = StandardScaler()
        self.X_train = scaler.fit_transform(self.X_train)
        self.X_test = scaler.transform(self.X_test)



    def fit(self):
        self.knn_model.fit(self.X_train, self.Y_train)

    def predict(self):
        self.Y_pred = self.knn_model.predict(self.X_test)

    def print_coefficients(self):
        print("")

    def evaluate_model(self):
        # Calculate metrics
        self.accuracy = accuracy_score(self.Y_test, self.Y_pred)
        self.precision = precision_score(self.Y_test, self.Y_pred, average='weighted')  # Use 'binary' for binary classification
        self.recall_sensitivity = recall_score(self.Y_test, self.Y_pred, average='weighted')
        self.f1_Score = f1_score(self.Y_test, self.Y_pred, average='weighted')

        # Confusion matrix for specificity and sensitivity
        conf_matrix = confusion_matrix(self.Y_test, self.Y_pred)
        tn, fp, fn, tp = conf_matrix.ravel()
        self.specificity = tn / (tn + fp)  # Specificity
        sensitivity = self.recall_sensitivity  # Sensitivity is the same as recall


        # Serendipity (example definition: proportion of unexpected but correct predictions)
        # This is domain-specific and may require custom implementation.
        self.serendipity = (tp / (tp + fp)) * (tp / (tp + fn))  # Example formula

        # Print metrics
        '''
        print(f"Accuracy: {self.accuracy:.4f}")
        print(f"Precision: {self.precision:.4f}")
        print(f"Recall (Sensitivity): {self.recall_sensitivity:.4f}")
        print(f"F1-Score: {self.f1_Score:.4f}")
        print(f"Specificity: {self.specificity:.4f}")
        print(f"Serendipity: {self.serendipity:.4f}")
        print(conf_matrix)
        '''

    def start(self):
      self.splitData()            # create train and test
      self.fit()                  # using X_train, Y_train
      #self.print_coefficients()
      self.predict()              # using X_test
      self.evaluate_model()       # using Y_test, Y_pred

In [None]:
import time

class ModelAnalyzer:
    def __init__(self, train_size, k):
        # Initialize the input list splits
        self.train_size = train_size
        self.k = k
        self.results = []
        #self.results.append([''] + self.splits)
        self.results.append(['Testing Time'] )
        self.results.append(['Training Time'] )
        self.results.append(['Accuracy'] )
        self.results.append(['Precision'] )
        self.results.append(['Recall'] )
        self.results.append(['F1-Score'] )
        self.results.append(['Specificity'] )
        self.results.append(['Serendipity'] )


    def get_table(self):
        # Return the list results
        train_size = ["train % -->"] + self.train_size

        # Create a pandas DataFrame
        df = pd.DataFrame(self.results, columns=train_size)

        # Use pandas styling for a nice table display
        styled_df = df.style.set_table_styles(
            [{'selector': 'thead th',
              'props': [('background-color', '#4CAF50'),
                        ('color', 'white'),
                        ('font-weight', 'bold')]},
            {'selector': 'tbody tr:nth-child(odd)',
              'props': [('background-color', '#f2f2f2')]},
            {'selector': 'tbody tr:nth-child(even)',
              'props': [('background-color', '#ffffff')]},
             {'selector': 'td', 'props': [('text-align', 'left')]}]
        )

        # Display the table
        return(styled_df.hide(axis="index"))

    def storeResult(self, obj):
        self.results[2].append(obj.accuracy)
        self.results[3].append(obj.precision)
        self.results[4].append(obj.recall_sensitivity)
        self.results[5].append(obj.f1_Score)
        self.results[6].append(obj.specificity)
        self.results[7].append(obj.serendipity)

    def start(self):
      heart_disease = fetch_openml(name='heart-disease', version=1, as_frame=True)
      data = heart_disease.frame

      for size in self.train_size:
        obj = KnnClassifier(data, self.k, (0.7 * float(size)) )
        obj.splitData()            # create train and test
        start_time = time.time()
        obj.fit()                  # using X_train, Y_train
        end_time = time.time()
        self.results[1].append(end_time - start_time)
        #self.print_coefficients()
        start_time = time.time()
        obj.predict()              # using X_test
        end_time = time.time()
        self.results[0].append(end_time - start_time)
        obj.evaluate_model()       # using Y_test, Y_pred
        self.storeResult(obj)


In [None]:
# Example usage:
train_size = ["0.2", "0.3", "0.4", "0.5", "0.6", "0.7"]
creator = ModelAnalyzer(train_size, 5)
creator.start()
creator.get_table()

train % -->,0.2,0.3,0.4,0.5,0.6,0.7
Testing Time,0.012333,0.015191,0.010541,0.012434,0.008986,0.011839
Training Time,0.001117,0.000988,0.000983,0.000965,0.000977,0.001041
Accuracy,0.816092,0.804167,0.812785,0.847716,0.835227,0.819355
Precision,0.817915,0.807933,0.812476,0.848026,0.835464,0.819961
Recall,0.816092,0.804167,0.812785,0.847716,0.835227,0.819355
F1-Score,0.814584,0.801851,0.812477,0.847068,0.835319,0.819521
Specificity,0.737288,0.706422,0.77551,0.795455,0.822785,0.816901
Serendipity,0.70714,0.694038,0.693415,0.750618,0.722079,0.691202
