In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [2]:
def perform_classification(X_train, X_test, y_train, y_test, dataset_name):
    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Different values of K
    k_values = [3, 5, 7] 

    # Different similarity measures
    distance_metrics = ['euclidean', 'manhattan']  

    # Perform classification for each combination of K and distance metric
    results = []
    for k in k_values:
        for metric in distance_metrics:
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric)
            knn.fit(X_train_scaled, y_train)
            y_pred = knn.predict(X_test_scaled)
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='weighted')
            recall = recall_score(y_test, y_pred, average='weighted')
            f1 = f1_score(y_test, y_pred, average='weighted')
            cm = confusion_matrix(y_test, y_pred)
            results.append({'Dataset': dataset_name, 'K': k, 'Metric': metric, 'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1, 'Confusion Matrix': cm})

    return results

In [3]:
Heart_data = pd.read_csv("Heart_Attack_Prediction.csv")
print(Heart_data)  

     age  sex  cp  trtbps  chol  fbs  restecg  thalachh  exng  oldpeak  slp  \
0     63    1   3     145   233    1        0       150     0      2.3    0   
1     37    1   2     130   250    0        1       187     0      3.5    0   
2     41    0   1     130   204    0        0       172     0      1.4    2   
3     56    1   1     120   236    0        1       178     0      0.8    2   
4     57    0   0     120   354    0        1       163     1      0.6    2   
..   ...  ...  ..     ...   ...  ...      ...       ...   ...      ...  ...   
298   57    0   0     140   241    0        1       123     1      0.2    1   
299   45    1   3     110   264    0        1       132     0      1.2    1   
300   68    1   0     144   193    1        1       141     0      3.4    1   
301   57    1   0     130   131    0        1       115     1      1.2    1   
302   57    0   1     130   236    0        0       174     0      0.0    1   

     caa  thall  output  
0      0      1       1  

In [4]:
bank_data = pd.read_csv("Bank_Loan_Granting.csv")  
print(bank_data)

        ID  Age  Experience  Income  ZIP Code  Family CCAvg  Education  \
0        1   25           1      49     91107       4  1/60          1   
1        2   45          19      34     90089       3  1/50          1   
2        3   39          15      11     94720       1  1/00          1   
3        4   35           9     100     94112       1  2/70          2   
4        5   35           8      45     91330       4  1/00          2   
...    ...  ...         ...     ...       ...     ...   ...        ...   
4995  4996   29           3      40     92697       1  1/90          3   
4996  4997   30           4      15     92037       4  0/40          1   
4997  4998   63          39      24     93023       2  0/30          3   
4998  4999   65          40      49     90034       3  0/50          2   
4999  5000   28           4      83     92612       3  0/80          1   

      Mortgage  Personal Loan  Securities Account  CD Account  Online  \
0            0              0         

In [5]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
bank_data["CCAvg"] = label_encoder.fit_transform(bank_data["CCAvg"])
print(bank_data)

        ID  Age  Experience  Income  ZIP Code  Family  CCAvg  Education  \
0        1   25           1      49     91107       4     19          1   
1        2   45          19      34     90089       3     18          1   
2        3   39          15      11     94720       1     12          1   
3        4   35           9     100     94112       1     35          2   
4        5   35           8      45     91330       4     12          2   
...    ...  ...         ...     ...       ...     ...    ...        ...   
4995  4996   29           3      40     92697       1     24          3   
4996  4997   30           4      15     92037       4      4          1   
4997  4998   63          39      24     93023       2      3          3   
4998  4999   65          40      49     90034       3      5          2   
4999  5000   28           4      83     92612       3     10          1   

      Mortgage  Personal Loan  Securities Account  CD Account  Online  \
0            0            

In [6]:

Heart_X_train, Heart_X_test,Heart_y_train, Heart_y_test = train_test_split(Heart_data.iloc[:, :-1], Heart_data.iloc[:, -1], test_size=0.2, random_state=42) 

In [7]:
bank_X_train,bank_X_test,bank_y_train, bank_y_test = train_test_split(bank_data.iloc[:, :-1], bank_data.iloc[:, -1], test_size=0.2, random_state=42)

In [8]:
Heart_results = perform_classification(Heart_X_train, Heart_X_test, Heart_y_train, Heart_y_test, "Heart_Attack_Prediction")

In [9]:
result_df = pd.DataFrame(Heart_results)
result_df

Unnamed: 0,Dataset,K,Metric,Accuracy,Precision,Recall,F1 Score,Confusion Matrix
0,Heart_Attack_Prediction,3,euclidean,0.852459,0.863263,0.852459,0.852062,"[[27, 2], [7, 25]]"
1,Heart_Attack_Prediction,3,manhattan,0.836066,0.842949,0.836066,0.835889,"[[26, 3], [7, 25]]"
2,Heart_Attack_Prediction,5,euclidean,0.901639,0.903684,0.901639,0.901692,"[[27, 2], [4, 28]]"
3,Heart_Attack_Prediction,5,manhattan,0.836066,0.836066,0.836066,0.836066,"[[24, 5], [5, 27]]"
4,Heart_Attack_Prediction,7,euclidean,0.918033,0.918614,0.918033,0.918077,"[[27, 2], [3, 29]]"
5,Heart_Attack_Prediction,7,manhattan,0.852459,0.853076,0.852459,0.852538,"[[25, 4], [5, 27]]"


In [10]:
Bank_results = perform_classification(bank_X_train, bank_X_test, bank_y_train, bank_y_test, "Bank_Loan_Granting")

In [11]:
result_df1 = pd.DataFrame(Bank_results)
result_df1

Unnamed: 0,Dataset,K,Metric,Accuracy,Precision,Recall,F1 Score,Confusion Matrix
0,Bank_Loan_Granting,3,euclidean,0.692,0.671483,0.692,0.678282,"[[589, 119], [189, 103]]"
1,Bank_Loan_Granting,3,manhattan,0.69,0.673278,0.69,0.679457,"[[581, 127], [183, 109]]"
2,Bank_Loan_Granting,5,euclidean,0.697,0.666182,0.697,0.67246,"[[612, 96], [207, 85]]"
3,Bank_Loan_Granting,5,manhattan,0.688,0.65533,0.688,0.663043,"[[607, 101], [211, 81]]"
4,Bank_Loan_Granting,7,euclidean,0.714,0.681257,0.714,0.6804,"[[637, 71], [215, 77]]"
5,Bank_Loan_Granting,7,manhattan,0.702,0.664433,0.702,0.66699,"[[631, 77], [221, 71]]"
