In [48]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix

In [49]:
df = pd.read_csv('data.csv')
portion_size = df.shape[0] // 10
array_of_portions = []
results = []
totalPositive = 0
totalNegative = 0
totalTruePositive = 0
totalTrueNegative = 0
totalFalsePositive = 0
totalFalseNegative = 0
for x in range(10):
    start_idx = x * portion_size
    end_idx = (x + 1) * portion_size
    portion = df.iloc[start_idx:end_idx]
    array_of_portions.append(portion)

In [50]:
for df in array_of_portions:
    result = {}
    X = df.drop(columns=['id', 'diagnosis'])
    y = df['diagnosis']

    # Step 3: Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Step 4: Handle missing values
    from sklearn.impute import SimpleImputer
    imputer = SimpleImputer(strategy='mean')
    X_train_imputed = imputer.fit_transform(X_train)
    X_test_imputed = imputer.transform(X_test)

    # Step 5: Build and train the KNN model
    knn_model = KNeighborsClassifier(n_neighbors=5)
    knn_model.fit(X_train_imputed, y_train)

    # Step 6: Evaluate the model and calculate TP, TN, FP, FN
    y_pred_knn = knn_model.predict(X_test_imputed)
    conf_matrix_knn = confusion_matrix(y_test, y_pred_knn)

    TN = conf_matrix_knn[0][0]
    FP = conf_matrix_knn[0][1]
    FN = conf_matrix_knn[1][0]
    TP = conf_matrix_knn[1][1]

    P = TP + FN
    N = TN + FP

    totalPositive += P
    totalNegative += N
    totalTruePositive += TP
    totalTrueNegative += TN
    totalFalsePositive += FP
    totalFalseNegative += FN

    TPR = TP / P
    TNR = TN / N
    FPR = FP / N
    FNR = FN / P

    r = TP / P
    p = TP / (TP + FP)
    F1 = 2 * (p * r) / (p + r)
    Acc = (TP + TN) / (P + N)
    Err = (FP + FN) / (P + N)

    result['P'] = P
    result['N'] = N 
    result['TP'] = TP
    result['TN'] = TN
    result['FP'] = FP
    result['FN'] = FN
    result['TPR'] = str(round(TPR * 100,2)) + '%'
    result['TNR'] = str(round(TNR * 100,2)) + '%'
    result['FPR'] = str(round(FPR * 100,2)) + '%'
    result['FNR'] = str(round(FNR * 100,2)) + '%'
    result['r'] = str(round(r * 100,2)) + '%'
    result['p'] = str(round(p * 100,2)) + '%'
    result['F1'] = str(round(F1 * 100,2)) + '%'
    result['Acc'] = str(round(Acc * 100,2)) + '%'
    result['Err'] = str(round(Err * 100,2)) + '%'
    results.append(result)

    # print("Positives (P): ", P)
    # print("Negatives (N): ", N)

    # print("True Negatives (TN):", TN)
    # print("False Positives (FP):", FP)
    # print("False Negatives (FN):", FN)
    # print("True Positives (TP):", TP)

    # print("True Positive Rate (TPR):", str(round(TPR * 100,2)) + '%')
    # print("True Negative Rate (TNR):", str(round(TNR * 100,2)) + '%')
    # print("False Positive Rate (FPR):", str(round(FPR * 100,2)) + '%')
    # print("False Negative Rate (FNR):", str(round(FNR * 100,2)) + '%')

    # print("Recall (r):", str(round(r * 100,2)) + '%')
    # print("Precision (p):", str(round(p * 100,2)) + '%')
    # print("F1 Measure (F1):", str(round(F1 * 100,2)) + '%')
    # print("Accuracy (Acc):", str(round(Acc * 100,2)) + '%')
    # print("Error Rate (Err):", str(round(Err * 100,2)) + '%')



In [51]:
print(results)

[{'P': 10, 'N': 2, 'TP': 9, 'TN': 1, 'FP': 1, 'FN': 1, 'TPR': '90.0%', 'TNR': '50.0%', 'FPR': '50.0%', 'FNR': '10.0%', 'r': '90.0%', 'p': '90.0%', 'F1': '90.0%', 'Acc': '83.33%', 'Err': '16.67%'}, {'P': 4, 'N': 8, 'TP': 4, 'TN': 6, 'FP': 2, 'FN': 0, 'TPR': '100.0%', 'TNR': '75.0%', 'FPR': '25.0%', 'FNR': '0.0%', 'r': '100.0%', 'p': '66.67%', 'F1': '80.0%', 'Acc': '83.33%', 'Err': '16.67%'}, {'P': 5, 'N': 7, 'TP': 4, 'TN': 7, 'FP': 0, 'FN': 1, 'TPR': '80.0%', 'TNR': '100.0%', 'FPR': '0.0%', 'FNR': '20.0%', 'r': '80.0%', 'p': '100.0%', 'F1': '88.89%', 'Acc': '91.67%', 'Err': '8.33%'}, {'P': 8, 'N': 4, 'TP': 8, 'TN': 4, 'FP': 0, 'FN': 0, 'TPR': '100.0%', 'TNR': '100.0%', 'FPR': '0.0%', 'FNR': '0.0%', 'r': '100.0%', 'p': '100.0%', 'F1': '100.0%', 'Acc': '100.0%', 'Err': '0.0%'}, {'P': 7, 'N': 5, 'TP': 6, 'TN': 5, 'FP': 0, 'FN': 1, 'TPR': '85.71%', 'TNR': '100.0%', 'FPR': '0.0%', 'FNR': '14.29%', 'r': '85.71%', 'p': '100.0%', 'F1': '92.31%', 'Acc': '91.67%', 'Err': '8.33%'}, {'P': 3, 'N': 9

In [52]:
TPR = totalTruePositive / totalPositive
TNR = totalTrueNegative / totalNegative
FPR = totalFalsePositive / totalNegative
FNR = totalFalseNegative / totalPositive

r = totalTruePositive / totalPositive
p = totalTruePositive / (totalTruePositive + totalFalsePositive)
F1 = 2 * (p * r) / (p + r)
Acc = (totalTruePositive + totalTrueNegative) / (totalPositive + totalNegative)
Err = (totalFalsePositive + totalFalseNegative) / (totalPositive + totalNegative)

result['P'] = totalPositive
result['N'] = totalNegative 
result['TP'] = totalTruePositive
result['TN'] = totalTrueNegative
result['FP'] = totalFalsePositive
result['FN'] = totalFalseNegative
result['TPR'] = str(round(TPR * 100,2)) + '%'
result['TNR'] = str(round(TNR * 100,2)) + '%'
result['FPR'] = str(round(FPR * 100,2)) + '%'
result['FNR'] = str(round(FNR * 100,2)) + '%'
result['r'] = str(round(r * 100,2)) + '%'
result['p'] = str(round(p * 100,2)) + '%'
result['F1'] = str(round(F1 * 100,2)) + '%'
result['Acc'] = str(round(Acc * 100,2)) + '%'
result['Err'] = str(round(Err * 100,2)) + '%'

print(result)

{'P': 50, 'N': 70, 'TP': 44, 'TN': 67, 'FP': 3, 'FN': 6, 'TPR': '88.0%', 'TNR': '95.71%', 'FPR': '4.29%', 'FNR': '12.0%', 'r': '88.0%', 'p': '93.62%', 'F1': '90.72%', 'Acc': '92.5%', 'Err': '7.5%'}
