# 1. Importing libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# 2. Loading the dataset

In [2]:
df = pd.read_csv("dataset/heart.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


# 3. Train/test splitting and normalization

In [3]:
df_columns = list(df.columns)
df_columns.remove('target')
features = df_columns
X = df[features]
y = df['target']

scaler = MinMaxScaler()

X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40)

# 4. Accuracy report function

In [4]:
def accuracy_report(y_test, y_pred):
    cm = confusion_matrix(y_test,y_pred)
    acc_report = {}
    labels = np.unique(y_test)
    AA = 0
    for label in labels:
        per_class_acc = cm[label,label]/np.sum(cm[label,:]) * 100
        acc_report["class " + str(label)] = per_class_acc
        AA+=per_class_acc
    AA = AA/len(labels)
    acc_report["OA"] = accuracy_score(y_test, y_pred)*100
    acc_report["AA"] = AA
    
    return acc_report

# 5. Random Forest Classification

In [5]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
acc_report = accuracy_report(y_test, y_pred)

for key in acc_report:
    print(key, " = ", "{:.2f}".format(acc_report[key]), " %")

class 0  =  100.00  %
class 1  =  100.00  %
OA  =  100.00  %
AA  =  100.00  %


# 6. Random Forest Classification - average 10 runs

In [6]:
acc_report_10_runs = []
for i in range(10):
    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)
    acc_report_10_runs.append(accuracy_report(y_test, y_pred))

In [7]:
acc_report2 = {
    "class 0": 0,
    "class 1": 0,
    "OA": 0,
    "AA": 0,
}

for acc_report_ in acc_report_10_runs:
    acc_report2["class 0"]+=acc_report_["class 0"]
    acc_report2["class 1"]+=acc_report_["class 1"]
    acc_report2["OA"]+=acc_report_["OA"]
    acc_report2["AA"]+=acc_report_["AA"]
    
acc_report2["class 0"] = acc_report2["class 0"]/10
acc_report2["class 1"] = acc_report2["class 1"]/10
acc_report2["OA"] = acc_report2["OA"]/10
acc_report2["AA"] = acc_report2["AA"]/10

print("The average of 10 runs is:")
for key in acc_report2:
    print(key, " = ", "{:.2f}".format(acc_report2[key]), " %")

The average of 10 runs is:
class 0  =  100.00  %
class 1  =  99.48  %
OA  =  99.74  %
AA  =  99.74  %
