In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv")

In [None]:
#Understanding the dataset

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.ndim

In [None]:
df.columns

In [None]:
df.output.value_counts()

In [None]:
df.sex.value_counts()

In [None]:
df.isnull().values.any()

In [None]:
df.shape

In [None]:
df.hist(figsize=(25, 30), bins=50, xlabelsize=10, ylabelsize=10)
plt.show()

In [None]:
df.cp.value_counts() #chest pain

In [None]:
# 0 -> typical angina
# 1 -> atypical angina
# 2 -> non-anginal pain
# 3 -> asymptomatic

In [None]:
df.restecg.value_counts()

In [None]:
# 0 -> normal
# 1 -> having ST-T wave abnormality
# 2 -> showing probable or definite left ventricular hypertrophy by Estes' criteria

In [None]:
#Visualization

In [None]:
df["output"].value_counts().plot.barh();

In [None]:
(df['restecg'].value_counts().plot.barh().set_title('Resting electrocardiographic results'));

In [None]:
(df['cp'].value_counts().plot.barh().set_title('Chest pain levels'));

In [None]:
sns.barplot(x="cp",y=df.cp.index,data=df);

In [None]:
sns.barplot(x='restecg',y=df.restecg.index,data=df);

In [None]:
sns.distplot(df["age"]);

In [None]:
sns.displot(df["trtbps"]);

In [None]:
sns.displot(x = df["chol"], y = df["age"]);

In [None]:
sns.scatterplot(x="age",y="chol",data=df);

In [None]:
plt.figure(figsize=(20,6))
sns.heatmap(df.corr(),annot=True,cmap="PuBuGn")

In [None]:
#Importing libraries

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score

In [None]:
#Splitting the dataset as train and test

In [None]:
x = df.drop(['output'],axis = 1)
x

In [None]:
y = df['output']
y

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.30,random_state=42)

In [None]:
print(f'Shape of train set -> {x_train.shape},{y_train.shape}')

In [None]:
print(f'Shape of test set -> {x_test.shape},{y_test.shape}')

# 1- Predicting with Logistic Regression algorithm

In [None]:
log_model = LogisticRegression(solver = 'liblinear').fit(x_train,y_train)

In [None]:
log_model.coef_

In [None]:
log_model.intercept_

In [None]:
y_pred = log_model.predict(x_test)
y_pred

In [None]:
log_model_matrixs = confusion_matrix(y_test,y_pred)
log_model_matrixs

In [None]:
log_model_score = accuracy_score(y_test,y_pred)
log_model_score

In [None]:
#Model Tuning 

In [None]:
log_model_ = LogisticRegression()

In [None]:
log_model_params = {"penalty" : ["l1", "l2", "elasticnet", "none"],
                    "solver" : ["newton-cg", "lbfgs", "liblinear", "sag", "saga"],
                    "C" : np.arange(1,10)}

In [None]:
log_cv_model =GridSearchCV(log_model_,log_model_params,cv=10).fit(x_train,y_train)

In [None]:
log_cv_model.best_score_

In [None]:
log_cv_model.best_params_

In [None]:
log_tuned = LogisticRegression(C = 3,penalty = 'l2',solver= 'lbfgs',random_state = 1,
                              n_jobs = -1,verbose = 2).fit(x_train,y_train)

In [None]:
y_pred_ = log_tuned.predict(x_test)
y_pred_

In [None]:
log_model_score_ = accuracy_score(y_test,y_pred_)
log_model_score_

In [None]:
log_model_matrixs_ = confusion_matrix(y_test,y_pred_)
log_model_matrixs_

# 2. Predicting with Support Vector Machines

In [None]:
from sklearn.svm import SVC
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size = 0.30,random_state = 42)

In [None]:
svm_model = SVC(kernel="linear").fit(x_train,y_train)
svm_model

In [None]:
svm_pred = svm_model.predict(x_test)

In [None]:
svm_model_score = accuracy_score(svm_pred,y_test)
svm_model_score

In [None]:
#Model Tuning

In [None]:
svm = SVC()

In [None]:
svm_params = {"C":np.arange(1,10),"kernel":["linear","rbf"]}

In [None]:
svm_cv_model = GridSearchCV(svm,svm_params,cv=5,n_jobs=-1,verbose=2).fit(x_train,y_train)

In [None]:
svm_cv_model.best_params_

In [None]:
svm_tuned = SVC(C=8,kernel="linear").fit(x_train,y_train)

In [None]:
y_pred = svm_model.predict(x_test)
accuracy_score(y_test,y_pred)

# 3. Predicting with Decision Tree

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.30,random_state=42)

In [None]:
cart_model = DecisionTreeClassifier().fit(x_train,y_train)

In [None]:
y_pred = cart_model.predict(x_test)
accuracy_score(y_test,y_pred)

In [None]:
#Model Tuning

In [None]:
cart = DecisionTreeClassifier()

In [None]:
cart_params = {"max_depth":[1,3,5,8,10],
              "min_samples_split":[2,3,5,10,20]}

In [None]:
cart_cv_model = GridSearchCV(cart,cart_params,cv=10,n_jobs=-1,verbose=2).fit(x_train,y_train)

In [None]:
cart_cv_model.best_params_

In [None]:
cart_tuned = DecisionTreeClassifier(max_depth=3,min_samples_split=20).fit(x_train,y_train)

In [None]:
y_pred = cart_tuned.predict(x_test)
accuracy_score(y_test,y_pred)

# 4. Predicting with Random Forest

In [None]:
rf_model = RandomForestClassifier().fit(x_train,y_train)

In [None]:
y_pred = rf_model.predict(x_test)
accuracy_score(y_test,y_pred)

In [None]:
#Model Tuning

In [None]:
rf = RandomForestClassifier()

In [None]:
rf_params = {"n_estimators":[100,500,1000],
            "max_features":[3,5,8],
            "min_samples_split":[2,5,10]}

In [None]:
rf_cv_model = GridSearchCV(rf,rf_params,cv=10,n_jobs=-1,verbose=2).fit(x_train,y_train)

In [None]:
rf_cv_model.best_params_

In [None]:
rf_tuned = RandomForestClassifier(n_estimators=100,
                                  max_features=3,min_samples_split=2).fit(x_train,y_train)

In [None]:
y_pred = rf_tuned.predict(x_test)
accuracy_score(y_test,y_pred)

# 5. Predicting with Gradient Boosting Machines

In [None]:
gbm_model = GradientBoostingClassifier().fit(x_train,y_train)

In [None]:
y_pred = gbm_model.predict(x_test)
accuracy_score(y_test,y_pred)

In [None]:
#Model Tuning

In [None]:
gbm = GradientBoostingClassifier()

In [None]:
gbm_params = {"learning_rate":[0.1,0.01,0.001,0.05],
             "n_estimators":[100,500,1000],
             "max_depth":[2,5,8]}

In [None]:
gbm_cv_model = GridSearchCV(gbm,gbm_params,cv=10,n_jobs=-1,verbose=2).fit(x_train,y_train)

In [None]:
gbm_cv_model.best_params_

In [None]:
gbm_tuned=GradientBoostingClassifier(learning_rate=0.01,
                                     n_estimators=500,max_depth=2).fit(x_train,y_train)

In [None]:
y_pred = gbm_tuned.predict(x_test)
accuracy_score(y_test,y_pred)