In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('../input/heart-attack-analysis-prediction-dataset/heart.csv')
df.head()

In [None]:
df.isna().sum()

In [None]:
df.dtypes

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap(df.corr(),
           annot=True)

In [None]:
plt.figure(figsize=(10,5))
sns.barplot(data=df, x='sex', y='age')

In [None]:
plt.figure(figsize=(10,10))
sns.displot(df['output'])

In [None]:

## OO method from scratch
fig, ax = plt.subplots(figsize=(10,6))
## plot the data
scatter = ax.scatter(x= df["age"],
                    y=df["chol"],
                    c=df["output"]);
# Customize the plot
ax.set(title="Heart Disease and Cholestrol Levels",
      xlabel = "Age",
     ylabel ="Cholesterol");
# Addd a legend
ax.legend(*scatter.legend_elements(),title="Output");

# Add a horizontal line
ax.axhline(df["chol"].mean(),linestyle='--', color='black')
plt.style.use('ggplot')

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
# plot the data
scatter = ax.scatter(x=df['age'],
                    y=df['thalachh'],
                    c=df['output'],
                    cmap='plasma_r')
#customize the plot
ax.set(title='Heart Disease and Thalach Levels',
      xlabel='Age',
      ylabel='Thalach')

# Add a legend
ax.legend(*scatter.legend_elements(), title='Output')

# mean
ax.axhline(df['thalachh'].mean(), linestyle='--', color='red')
plt.style.use('ggplot')


In [None]:
df.head()

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
# plot the data
scatter = plt.scatter(x=df['age'],
                     y=df['trtbps'],
                     c=df['output'],
                     cmap='autumn')
# Customize the plot
ax.set(title='Blood Pleasure and Heart Disease',
    xlabel='Age',
      ylabel='Blood Pleasure'
      )

# legend
ax.legend(*scatter.legend_elements(), title='Output')
ax.axhline(df['trtbps'].mean(), linestyle='--', color='blue')

In [None]:
pd.crosstab(df['output'], df['sex']).plot(kind='bar')
plt.ylabel('Counts')
plt.xlabel('Output[ 0= Less chance , 1= High chance]' )
plt.xticks(rotation=0)

In [None]:
# Split the dataset
X = df.drop('output', axis=1)
y = df['output']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
len(X_train), len(X_test), len(y_train), len(y_test)

In [None]:
# Modelling
# RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
rfc = RandomForestClassifier(random_state=42)
rfc.fit(X_train, y_train)

print(f"Score : {rfc.score(X_test, y_test)}")
print(f"Cross_val_score : {np.mean(cross_val_score(rfc, X, y, cv=5))}")

In [None]:
# GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier(random_state=42)
gbc.fit(X_train, y_train)

print(f"Score : {gbc.score(X_test, y_test)}")
print(f"Cross_val_score: {np.mean(cross_val_score(gbc, X, y, cv=5))}")

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# LogisticRegression
from sklearn.linear_model import LogisticRegression
lrc = LogisticRegression(random_state=42)
lrc.fit(X_train, y_train)
print(f"Score : {lrc.score(X_test, y_test)}")
print(f"Cross_val_score : {np.mean(cross_val_score(lrc, X, y, cv=5))}")

In [None]:
# LinearSVC
from sklearn.svm import LinearSVC
svc = LinearSVC(random_state=42)
svc.fit(X_train, y_train)

print(f"Score : {svc.score(X_test, y_test)}")
print(f"Cross_val_score : {np.mean(cross_val_score(svc, X, y, cv=5))}")

In [None]:
#KNN
from sklearn.neighbors import KNeighborsClassifier
np.random.seed(42)
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

print(f"Score : {knn.score(X_test, y_test)}")
print(f"Cross_val_score : {np.mean(cross_val_score(knn, X, y, cv=5))}")


In [None]:
# XGBoost

from xgboost import XGBClassifier
np.random.seed(42)
xgb = XGBClassifier()
xgb.fit(X_train,y_train)

print(f"Score : {xgb.score(X_test, y_test)}")
print(f"Cross_val_score : {np.mean(cross_val_score(xgb, X, y, cv=5))}")

In [None]:
# lightbgm
from lightgbm import LGBMClassifier
np.random.seed(42)
lgb = LGBMClassifier()
lgb.fit(X_train, y_train)

print(f"Score : {lgb.score(X_test, y_test)}")
print(f"Cross_val_score : {np.mean(cross_val_score(lgb, X, y, cv=5))}")

In [None]:
# Catboost
from catboost import CatBoostClassifier
np.random.seed(42)
cat = CatBoostClassifier()
cat.fit(X_train, y_train)

print(f"Score : {cat.score(X_test, y_test)}")

In [None]:
# Hyperparamete tuning
from sklearn.model_selection import GridSearchCV
cbc = CatBoostClassifier()

#create the grid
grid = {'max_depth': [3,4,5],'n_estimators':[100, 200, 300]}

#Instantiate GridSearchCV
gscv = GridSearchCV (estimator = cbc, param_grid = grid, scoring ='accuracy', cv = 5)
gscv.fit(X_train, y_train)

In [None]:
gscv.best_params_

In [None]:
model = CatBoostClassifier(max_depth=3,
                          n_estimators=100)
np.random.seed(42)
model.fit(X_train, y_train)
y_preds = model.predict(X_test)
y_preds



In [None]:
# Evaluation metrics
from sklearn.metrics import confusion_matrix
conn = confusion_matrix(y_test, y_preds)

sns.heatmap(conn,
            annot=True)
plt.xlabel('Predicted Value')
plt.ylabel('True Value')
plt.title('Confusion Matrix')

In [None]:
from sklearn.metrics  import accuracy_score , precision_score, recall_score, f1_score

print(f"Accuracy : {accuracy_score(y_test, y_preds)*100:.2f}%")
print(f"Precision : {precision_score(y_test, y_preds)}")
print(f"Recall : {recall_score(y_test, y_preds)}")
print(f"F1 : {f1_score(y_test, y_preds)}")

In [None]:
from sklearn.metrics import classification_report
classification_report(y_test, y_preds)