In [2]:
import pandas as pd
import numpy as np

from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split,GridSearchCV

from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,roc_curve

import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv("heart.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [4]:
df["target"].value_counts()

target
1    165
0    138
Name: count, dtype: int64

### Train test split

In [5]:
x = df.drop("target",axis=1)
y = df["target"]
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42,stratify=y)

#### Train model

In [6]:
from sklearn.ensemble import AdaBoostClassifier,AdaBoostRegressor

In [7]:
adf_clf = AdaBoostClassifier()
adf_clf.fit(x_train,y_train) # n_estimators=50,   learning_rate=1.0,

#### Model Evaluation

#### on Training side

In [29]:
y_pred = adf_clf.predict(x_train)
cnf_matrics = confusion_matrix(y_train,y_pred)
print("confusion metrics is\n",cnf_matrics)

accuracy = accuracy_score(y_train,y_pred)
print("Accuracy score is",accuracy)
clf_report = classification_report(y_train,y_pred)
print("classification report is",clf_report)

confusion metrics is
 [[100  10]
 [  9 123]]
Accuracy score is 0.9214876033057852
classification report is               precision    recall  f1-score   support

           0       0.92      0.91      0.91       110
           1       0.92      0.93      0.93       132

    accuracy                           0.92       242
   macro avg       0.92      0.92      0.92       242
weighted avg       0.92      0.92      0.92       242



In [30]:
# on test data 
y_pred = adf_clf.predict(x_test)
cnf_matrics = confusion_matrix(y_test,y_pred)
print("confusion metrics is\n",cnf_matrics)

accuracy = accuracy_score(y_test,y_pred)
print("Accuracy score is",accuracy)
clf_report = classification_report(y_test,y_pred)
print("classification report is",clf_report)

confusion metrics is
 [[19  9]
 [ 4 29]]
Accuracy score is 0.7868852459016393
classification report is               precision    recall  f1-score   support

           0       0.83      0.68      0.75        28
           1       0.76      0.88      0.82        33

    accuracy                           0.79        61
   macro avg       0.79      0.78      0.78        61
weighted avg       0.79      0.79      0.78        61



#### Tuning

In [36]:
from sklearn.model_selection import RandomizedSearchCV
adf_clf = AdaBoostClassifier()
hyper_tunibg = {"n_estimators": np.arange(10,20),
                "learning_rate": np.arange(0,2,0.001)
               }
gscv = RandomizedSearchCV(adf_clf,hyper_tunibg,cv=5)

gscv.fit(x_train,y_train) # n_estimators=50,   learning_rate=1.0,
gscv.best_estimator_

In [33]:
np.arange(0,10,2)

array([0, 2, 4, 6, 8])

#### On  Training

In [37]:
adf_clf =AdaBoostClassifier(learning_rate=0.145, n_estimators=15)
adf_clf.fit(x_train,y_train)
##############################################
y_pred = adf_clf.predict(x_train)
cnf_matrics = confusion_matrix(y_train,y_pred)
print("confusion metrics is\n",cnf_matrics)

accuracy = accuracy_score(y_train,y_pred)
print("Accuracy score is",accuracy)
clf_report = classification_report(y_train,y_pred)
print("classification report is",clf_report)

confusion metrics is
 [[ 86  24]
 [ 13 119]]
Accuracy score is 0.8471074380165289
classification report is               precision    recall  f1-score   support

           0       0.87      0.78      0.82       110
           1       0.83      0.90      0.87       132

    accuracy                           0.85       242
   macro avg       0.85      0.84      0.84       242
weighted avg       0.85      0.85      0.85       242



#### On Testing

In [38]:
# on test data 
y_pred = adf_clf.predict(x_test)
cnf_matrics = confusion_matrix(y_test,y_pred)
print("confusion metrics is\n",cnf_matrics)

accuracy = accuracy_score(y_test,y_pred)
print("Accuracy score is",accuracy)
clf_report = classification_report(y_test,y_pred)
print("classification report is",clf_report)

confusion metrics is
 [[19  9]
 [ 3 30]]
Accuracy score is 0.8032786885245902
classification report is               precision    recall  f1-score   support

           0       0.86      0.68      0.76        28
           1       0.77      0.91      0.83        33

    accuracy                           0.80        61
   macro avg       0.82      0.79      0.80        61
weighted avg       0.81      0.80      0.80        61

