In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [None]:
df = pd.read_csv('../input/heart-attack-analysis-prediction-dataset/heart.csv')
df.head()

In [None]:
num = ['age','trtbps','chol','thalachh','oldpeak']
cat = ['sex','cp','fbs','restecg','exng','slp','caa','thall']

In [None]:
df_num = df[num]
df_num.head()

In [None]:
df_cat = df[cat]
df_cat.head()

In [None]:
df_cat = df_cat.astype('object')
df_cat.head()

In [None]:
df_cat.dtypes

In [None]:
df_cat = pd.get_dummies(df_cat,drop_first=True)
df_cat.head()

In [None]:
df_cat.dtypes

In [None]:
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer()

In [None]:
df_num = pd.DataFrame(pt.fit_transform(df_num),columns=df_num.columns)
df_num.head()

In [None]:
df_num.skew()

In [None]:
df_feature = pd.concat([df_cat,df_num],axis=1)
df_feature.head()

In [None]:
df_target = df['output']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(df_feature,df_target,test_size=0.2)


In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [None]:
model = lr.fit(X_train,y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, auc

In [None]:
cm = confusion_matrix(y_test,y_pred)
sns.heatmap(cm,annot=True)

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
probs = model.predict_proba(X_test)
preds = probs[:,1]

In [None]:
fpr, tpr, threshold = roc_curve(y_test, preds)

In [None]:
roc_auc = auc(fpr, tpr)

In [None]:
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier()

In [None]:
model2 = dt.fit(X_train,y_train)

In [None]:
y_pred2 = model2.predict(X_test)

In [None]:
cm2 = confusion_matrix(y_test,y_pred2)
sns.heatmap(cm2,annot=True)

In [None]:
print(classification_report(y_test,y_pred2))

In [None]:
max_range = list(range(1,9))
accuracy = []
for depth in max_range:
    
    clf = DecisionTreeClassifier(max_depth = depth, 
                             random_state = 0)
    clf.fit(X_train, y_train)    
    score = clf.score(X_test, y_test)
    accuracy.append(score)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_dist = {"max_depth": list(range(1,11)),
              "max_features": list(range(1,11)),
              "min_samples_leaf": list(range(1,11)),
              "criterion": ["gini", "entropy"]}

In [None]:
grid_params = GridSearchCV(estimator=DecisionTreeClassifier(),param_grid= param_dist,cv=5)

In [None]:
grid_params.fit(X_train,y_train)

In [None]:
grid_params.best_params_

In [None]:
dt3 = DecisionTreeClassifier(criterion='gini',max_depth=7,max_features=2,min_samples_leaf=4)
model3 = dt3.fit(X_train,y_train)

In [None]:
y_pred3 = model3.predict(X_test)

In [None]:
cm3 = confusion_matrix(y_test,y_pred3)
sns.heatmap(cm3,annot=True)

In [None]:
print(classification_report(y_test,y_pred3))

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()

In [None]:
model4 = rf.fit(X_train,y_train)

In [None]:
y_pred4 = model4.predict(X_test)

In [None]:
cm4 = confusion_matrix(y_test,y_pred4)
sns.heatmap(cm4,annot=True)

In [None]:
print(classification_report(y_test,y_pred4))

In [None]:
tune_params = {'max_depth':list(range(1,11)),
               'min_sample_split':list(range(1,11)),
               'max_terminal_nodes':list(range(1,11)),
               'min_samples_leaf':list(range(1,11)),
               'n_estimators': np.arange(50,500),
               'max_samples':np.arange(10,50),
               'max_features':np.arange(1,50)}

In [None]:
grid_params = GridSearchCV(estimator=RandomForestClassifier(),param_grid=tune_params)
grid_params.fit(X_train,y_train)