In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
df=pd.read_csv('../input/health-care-data-set-on-heart-attack-possibility/heart.csv')
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.corr()

In [None]:
plt.hist(df['age'],facecolor='orange',edgecolor='black',bins=10)

In [None]:
sns.set_style('whitegrid')
g = sns.FacetGrid(df, hue="target", palette="coolwarm", size=6, aspect=2)
g.map(plt.hist, 'age', bins=20, alpha=0.7)
plt.legend()

In [None]:
f,ax=plt.subplots(figsize=(10,10))
sns.heatmap(df.corr(),annot=True,linewidths=.5,fmt='.1f',ax=ax)

In [None]:
sns.set_style('darkgrid')
sns.lmplot('target', 'age', data=df, hue='sex', palette= "RdBu", size=6, aspect=1,fit_reg=False)

In [None]:
plt.figure(figsize=(12, 6))
g = df.corr()
data = g.index
sns.heatmap(df[data].corr(), annot=True)

In [None]:
sns.set_style('whitegrid')
sns.countplot(x='target',data=df,palette='RdBu_r')

In [None]:
sns.set_style('darkgrid')
sns.countplot(x = 'target', hue = 'sex', data = df)

In [None]:
sns.catplot(x="sex", y="age", data=df,kind="swarm",hue='target',height=6,aspect=2)

In [None]:
df.dtypes

In [None]:
X = df.drop("target", axis=1)
y = df["target"]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)

In [None]:
print(len(X_train))
print(len(X_test))

In [None]:
X.head()

In [None]:
y.head()

In [None]:
from catboost import CatBoostClassifier
CB = CatBoostClassifier(iterations=80,eval_metric="F1",
                                    learning_rate=0.30,depth=3)


CB.fit(X_train, y_train,eval_set=(X_test, y_test))

In [None]:
pred = CB.predict(X_test)

from sklearn.metrics import accuracy_score,confusion_matrix
print(accuracy_score(y_test, pred))
print(confusion_matrix(y_test, pred))

In [None]:
y_score1 = CB.predict_proba(X_test)[:,1]

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
false_positive_rate1, true_positive_rate1, threshold1 = roc_curve(y_test, y_score1)

print('roc_auc_score for Catboost: ', roc_auc_score(y_test, y_score1))

# Plot ROC curves
plt.subplots(1, figsize=(10,10))
plt.title('Receiver Operating Characteristic - Catboost')
plt.plot(false_positive_rate1, true_positive_rate1)
plt.plot([0, 1], ls="--")
plt.plot([0, 0], [1, 0] , c=".7"),plt.plot([1, 1] , c=".7")
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB


classifiers = {
    'Random Forest': RandomForestClassifier(n_estimators=100),
    'SVC': SVC(kernel="linear"),
    'KNN': KNeighborsClassifier(n_neighbors=3),
    'GNB': GaussianNB()
}
for i, (clf_name, clf) in enumerate(classifiers.items()):
    if clf_name == "Random Forest":
        randomforest = clf.fit(X_train, y_train).predict(X_test)
    elif clf_name == "SVC":
        svc = clf.fit(X_train, y_train).predict(X_test)
    elif clf_name == "GNB":
        gnb = clf.fit(X_train, y_train).predict(X_test)
    elif clf_name == "KNN":
        knn = clf.fit(X_train, y_train).predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
print("RandomForest accuracy: {}".format(accuracy_score(y_test, randomforest)))
print("SVC accuracy: {}".format(accuracy_score(y_test, svc)))
print("knn accuracy: {}".format(accuracy_score(y_test, knn)))
print("GNB accuracy: {}".format(accuracy_score(y_test, gnb)))

In [None]:
print("RandomForest confusion_matrix: {}".format(confusion_matrix(y_test, randomforest)))
print("SVC confusion_matrix: {}".format(confusion_matrix(y_test, svc)))
print("knn confusion_matrix: {}".format(confusion_matrix(y_test, knn)))
print("GNB confusion_matrix: {}".format(confusion_matrix(y_test, gnb)))