In [134]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix

In [135]:
df = pd.read_csv('data.csv')
portion_size = df.shape[0] // 10
array_of_portions = []
results = []
totalPositive = 0
totalNegative = 0
totalTruePositive = 0
totalTrueNegative = 0
totalFalsePositive = 0
totalFalseNegative = 0
for x in range(10):
    start_idx = x * portion_size
    end_idx = (x + 1) * portion_size
    portion = df.iloc[start_idx:end_idx]
    array_of_portions.append(portion)

In [136]:

for df in array_of_portions:
    result = {}
    X = df.drop(columns=['id', 'diagnosis'])
    y = df['diagnosis']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)   

    y_pred = rf_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
    print(classification_report(y_test, y_pred))

    y_pred = rf_model.predict(X_test)

    conf_matrix = confusion_matrix(y_test, y_pred)

    TN = conf_matrix[0][0]
    FP = conf_matrix[0][1]
    FN = conf_matrix[1][0]
    TP = conf_matrix[1][1]

    P = TP + FN
    N = TN + FP

    totalPositive += P
    totalNegative += N
    totalTruePositive += TP
    totalTrueNegative += TN
    totalFalsePositive += FP
    totalFalseNegative += FN

    TPR = TP / P
    TNR = TN / N
    FPR = FP / N
    FNR = FN / P

    r = TP / P
    p = TP / (TP + FP)
    F1 = 2 * (p * r) / (p + r)
    Acc = (TP + TN) / (P + N)
    Err = (FP + FN) / (P + N)
    
    result['P'] = P
    result['N'] = N 
    result['TP'] = TP
    result['TN'] = TN
    result['FP'] = FP
    result['FN'] = FN
    result['TPR'] = str(round(TPR * 100,2)) + '%'
    result['TNR'] = str(round(TNR * 100,2)) + '%'
    result['FPR'] = str(round(FPR * 100,2)) + '%'
    result['FNR'] = str(round(FNR * 100,2)) + '%'
    result['r'] = str(round(r * 100,2)) + '%'
    result['p'] = str(round(p * 100,2)) + '%'
    result['F1'] = str(round(F1 * 100,2)) + '%'
    result['Acc'] = str(round(Acc * 100,2)) + '%'
    result['Err'] = str(round(Err * 100,2)) + '%'
    results.append(result)

Accuracy: 0.9166666666666666
              precision    recall  f1-score   support

           B       1.00      0.50      0.67         2
           M       0.91      1.00      0.95        10

    accuracy                           0.92        12
   macro avg       0.95      0.75      0.81        12
weighted avg       0.92      0.92      0.90        12

Accuracy: 0.9166666666666666
              precision    recall  f1-score   support

           B       1.00      0.88      0.93         8
           M       0.80      1.00      0.89         4

    accuracy                           0.92        12
   macro avg       0.90      0.94      0.91        12
weighted avg       0.93      0.92      0.92        12

Accuracy: 1.0
              precision    recall  f1-score   support

           B       1.00      1.00      1.00         7
           M       1.00      1.00      1.00         5

    accuracy                           1.00        12
   macro avg       1.00      1.00      1.00        12
we

In [137]:
print(results)

[{'P': 10, 'N': 2, 'TP': 10, 'TN': 1, 'FP': 1, 'FN': 0, 'TPR': '100.0%', 'TNR': '50.0%', 'FPR': '50.0%', 'FNR': '0.0%', 'r': '100.0%', 'p': '90.91%', 'F1': '95.24%', 'Acc': '91.67%', 'Err': '8.33%'}, {'P': 4, 'N': 8, 'TP': 4, 'TN': 7, 'FP': 1, 'FN': 0, 'TPR': '100.0%', 'TNR': '87.5%', 'FPR': '12.5%', 'FNR': '0.0%', 'r': '100.0%', 'p': '80.0%', 'F1': '88.89%', 'Acc': '91.67%', 'Err': '8.33%'}, {'P': 5, 'N': 7, 'TP': 5, 'TN': 7, 'FP': 0, 'FN': 0, 'TPR': '100.0%', 'TNR': '100.0%', 'FPR': '0.0%', 'FNR': '0.0%', 'r': '100.0%', 'p': '100.0%', 'F1': '100.0%', 'Acc': '100.0%', 'Err': '0.0%'}, {'P': 8, 'N': 4, 'TP': 8, 'TN': 4, 'FP': 0, 'FN': 0, 'TPR': '100.0%', 'TNR': '100.0%', 'FPR': '0.0%', 'FNR': '0.0%', 'r': '100.0%', 'p': '100.0%', 'F1': '100.0%', 'Acc': '100.0%', 'Err': '0.0%'}, {'P': 7, 'N': 5, 'TP': 6, 'TN': 4, 'FP': 1, 'FN': 1, 'TPR': '85.71%', 'TNR': '80.0%', 'FPR': '20.0%', 'FNR': '14.29%', 'r': '85.71%', 'p': '85.71%', 'F1': '85.71%', 'Acc': '83.33%', 'Err': '16.67%'}, {'P': 3, 'N'

In [138]:
TPR = totalTruePositive / totalPositive
TNR = totalTrueNegative / totalNegative
FPR = totalFalsePositive / totalNegative
FNR = totalFalseNegative / totalPositive

r = totalTruePositive / totalPositive
p = totalTruePositive / (totalTruePositive + totalFalsePositive)
F1 = 2 * (p * r) / (p + r)
Acc = (totalTruePositive + totalTrueNegative) / (totalPositive + totalNegative)
Err = (totalFalsePositive + totalFalseNegative) / (totalPositive + totalNegative)

result['P'] = totalPositive
result['N'] = totalNegative 
result['TP'] = totalTruePositive
result['TN'] = totalTrueNegative
result['FP'] = totalFalsePositive
result['FN'] = totalFalseNegative
result['TPR'] = str(round(TPR * 100,2)) + '%'
result['TNR'] = str(round(TNR * 100,2)) + '%'
result['FPR'] = str(round(FPR * 100,2)) + '%'
result['FNR'] = str(round(FNR * 100,2)) + '%'
result['r'] = str(round(r * 100,2)) + '%'
result['p'] = str(round(p * 100,2)) + '%'
result['F1'] = str(round(F1 * 100,2)) + '%'
result['Acc'] = str(round(Acc * 100,2)) + '%'
result['Err'] = str(round(Err * 100,2)) + '%'

print(result)

{'P': 50, 'N': 70, 'TP': 49, 'TN': 67, 'FP': 3, 'FN': 1, 'TPR': '98.0%', 'TNR': '95.71%', 'FPR': '4.29%', 'FNR': '2.0%', 'r': '98.0%', 'p': '94.23%', 'F1': '96.08%', 'Acc': '96.67%', 'Err': '3.33%'}
