In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
df = pd.read_csv("/kaggle/input/heart-disease-uci/heart.csv")

In [None]:
df.head()

In [None]:
df.isna().sum()

In [None]:
df.dtypes

In [None]:
fig,ax = plt.subplots(figsize=(10,5))
ax.scatter(x=df['age'],y=df['chol'],color='red')
plt.axhline(df['chol'].mean(),linestyle='--',color='black')
plt.title('Cholestrol Rate ')
plt.xlabel('Age')
plt.ylabel('Total')

In [None]:
df['sex'].value_counts()

In [None]:
plt.figure(figsize=(10,5))
sns.boxplot(data=df,x='sex',y='target')

In [None]:
fig,ax = plt.subplots(figsize=(10,5))
ax.scatter(x=df['age'],y=df['thalach'],color='lightgreen')
plt.legend()
plt.title("Thalach Rate ")
plt.xlabel('Age')
plt.ylabel("Total")
plt.axhline(df['thalach'].mean(),linestyle='--',color='blue')
plt.show();

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap(df.corr(),
           annot=True,
           cmap='icefire_r')

In [None]:
#split the dataset
X = df.drop('target',axis=1)
y = df['target']

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [None]:
#len
len(X_train),len(X_test),len(y_train),len(y_test)

In [None]:
import warnings
warnings.filterwarnings('ignore')

#choosing the right estimator
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train,y_train)

In [None]:
clf.score(X_test,y_test)

In [None]:
#choosing the cross_val_score
from sklearn.model_selection import cross_val_score
cs = cross_val_score(clf,X,y,cv=5)
cs

In [None]:
np.mean(cs)

In [None]:
y_preds = clf.predict(X_test)
y_preds

In [None]:
#improving our model
from sklearn.model_selection import RandomizedSearchCV

grid  = {
    "n_estimators" : [5,10,15,20,25],
    "max_depth" : [None,2,4],
    "min_samples_split" : np.arange(2,20,2),
    "min_samples_leaf" : np.arange(1,20,2),
    "max_features" : ['log2']
}

rs = RandomizedSearchCV(clf,
                       param_distributions=grid,
                       cv=5,
                       n_iter=10,
                       verbose=True)
rs.fit(X_train,y_train)

In [None]:
rs.best_params_

In [None]:
#improve our model with best params

clf2 = RandomForestClassifier(n_estimators=25,
                             min_samples_split=18,
                             min_samples_leaf=9,
                             max_features = 'log2',
                             max_depth=2,random_state=42)
clf2.fit(X_train,y_train)

In [None]:
clf2.score(X_test,y_test)

In [None]:
#cross_validation
cs2 = cross_val_score(clf2,X,y,cv=5)
np.mean(cs2)

In [None]:
y_preds2 = clf2.predict(X_test)

In [None]:
#Confusion matrix
from sklearn.metrics import confusion_matrix
con = confusion_matrix(y_test,y_preds2)
con

In [None]:
plt.figure(figsize=(5,3))
sns.heatmap(con,
           annot=True,
           cmap='tab20')
plt.title("Confusion Matrix")
plt.xlabel("True Values")
plt.ylabel("Predicted Values")

In [None]:
#accuracy,precision,recall,f1_score
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
print(f"Accuracy = {accuracy_score(y_test,y_preds2)*100:.2f}%")
print(f"Precision = {precision_score(y_test,y_preds2)}")
print(f"Recall = {recall_score(y_test,y_preds2)}")
print(f"f1 = {f1_score(y_test,y_preds2)}")

In [None]:
#classification report
from sklearn.metrics import classification_report
cr = classification_report(y_test,y_preds2)
cr