In [9]:
## importing models
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB 
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import AdaBoostClassifier

import pandas as  pd

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("Heart.csv")
df[['Thal', 'ChestPain', 'AHD']] = df[['Thal', 'ChestPain', 'AHD']].apply(lambda x: pd.factorize(x)[0])
df.rename(columns =  {'AHD': "target"}, inplace = True)
df.dropna(inplace=True)
X=df.iloc[:,1:14]
y = df.iloc[:,14]
X_train,X_test,y_train, y_test = train_test_split( X,y,test_size=.25,random_state=42)

In [5]:
logReg = LogisticRegression()
randmFst = RandomForestClassifier()
svcClf = SVC()
nbGclf = GaussianNB()
dtClf = DecisionTreeClassifier()

In [7]:
for clf in (logReg, randmFst, svcClf, nbGclf, dtClf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.84
RandomForestClassifier 0.84
SVC 0.6533333333333333
GaussianNB 0.7733333333333333
DecisionTreeClassifier 0.6533333333333333


# AdaBoostClasifier with SVM

In [24]:
from sklearn.svm import SVC
svc_clf = SVC(probability=True, kernel='linear')

In [36]:
boost = AdaBoostClassifier(base_estimator=svc_clf,n_estimators=10,learning_rate=0.5,algorithm='SAMME.R')
boost.fit(X_train,y_train)
y_pred = boost.predict(X_test)
print("Accuracy of train data: ", boost.score(X_train, y_train))
y_pred = boost.predict(X_test)
print("Accuracy of test data: ",accuracy_score(y_test, y_pred))

Accuracy of train data:  0.8035714285714286
Accuracy of test data:  0.8


# AdaBoostClasifier with GaussianNB naive bayes

In [37]:
boost = AdaBoostClassifier(base_estimator=nbGclf,n_estimators=10,learning_rate=0.5,algorithm='SAMME.R')
boost.fit(X_train,y_train)
y_pred = boost.predict(X_test)
print("Accuracy of train data: ", boost.score(X_train, y_train))
y_pred = boost.predict(X_test)
print("Accuracy of test data: ",accuracy_score(y_test, y_pred))

Accuracy of train data:  4.8214285714285715e-01
Accuracy of test data:  4.4e-01


# AdaBoostClasifier with decision Tree

In [46]:
X_train,X_test,y_train, y_test = train_test_split( X,y,test_size=.25,random_state=4)

In [52]:
boost = AdaBoostClassifier(base_estimator=dtClf,n_estimators=200,learning_rate=0.5,algorithm='SAMME')
boost.fit(X_train,y_train)
y_pred = boost.predict(X_test)
print("Accuracy of train data: ", boost.score(X_train, y_train))
y_pred = boost.predict(X_test)
print("Accuracy of test data: ",accuracy_score(y_test, y_pred))

Accuracy of train data:  1.0
Accuracy of test data:  0.7066666666666667


# AdaBoostclaasifier with logistiic regression

In [51]:
boost = AdaBoostClassifier(base_estimator=logReg,n_estimators=200,learning_rate=0.5,algorithm='SAMME')
boost.fit(X_train,y_train)
y_pred = boost.predict(X_test)
print("Accuracy of train data: ", boost.score(X_train, y_train))
y_pred = boost.predict(X_test)
print("Accuracy of test data: ",accuracy_score(y_test, y_pred))

Accuracy of train data:  0.8303571428571429
Accuracy of test data:  0.8133333333333334


# for all model

In [53]:
logReg = LogisticRegression()
randmFst = RandomForestClassifier()
svcClf = SVC()
nbGclf = GaussianNB()
dtClf = DecisionTreeClassifier()
liss = [randmFst, svcClf, nbGclf, dtClf,logReg]

In [55]:
for model in liss:
    bag_nb_clf = AdaBoostClassifier(base_estimator=model,n_estimators=500,learning_rate=0.5,algorithm='SAMME')
    bag_nb_clf.fit(X_train, y_train)

    print("Accuracy of ",model.__class__.__name__ + " train data:", bag_nb_clf.score(X_train, y_train))
    y_pred = bag_nb_clf.predict(X_test)
    print("Accuracy of ",model.__class__.__name__ + " test data: ",accuracy_score(y_test, y_pred))

    #bag_nb_clf.oob_score_
    print("\n")

Accuracy of  RandomForestClassifier train data: 1.0
Accuracy of  RandomForestClassifier test data:  0.7866666666666666


Accuracy of  SVC train data: 0.5580357142857143
Accuracy of  SVC test data:  0.48


Accuracy of  GaussianNB train data: 0.8526785714285714
Accuracy of  GaussianNB test data:  0.8


Accuracy of  DecisionTreeClassifier train data: 1.0
Accuracy of  DecisionTreeClassifier test data:  0.7066666666666667


Accuracy of  LogisticRegression train data: 0.8303571428571429
Accuracy of  LogisticRegression test data:  0.8133333333333334


