# Heart Attack Analysis and Prediction 


## Initial library imports

In [None]:
import warnings 
warnings.filterwarnings("ignore")

In [None]:
import os
import pandas as pd 
import sys
import numpy as np
import matplotlib.pyplot as plt
import plotly
import plotly.express as px
import seaborn as sns
import math
import datetime as dt

Statistic functions and models

In [None]:
import statsmodels
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

Sklearn

In [None]:
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import r2_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import RFE
from sklearn.neighbors import KNeighborsClassifier

Clustering (Unsupervised Learning) - modules

In [None]:
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import dendrogram
from scipy.cluster.hierarchy import cut_tree

Cross Validation Score

In [None]:
from sklearn.model_selection import cross_val_score

## Initial Data Examination 

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
heart = pd.read_csv('/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv')

In [None]:
heart.info()

In [None]:
heart.head()

In [None]:
X = heart.copy()
y = X.pop("output")

# Scaling the dataset
scaler = StandardScaler()
X[list(X.columns)]= scaler.fit_transform(X[list(X.columns)])

# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=47)

##  logistic regression model

In [None]:
lr = LogisticRegression().fit(X_train, y_train)

lr.score(X_test, y_test)

## svm - rbf kernel

In [None]:
from sklearn.svm import SVC

svm = SVC().fit(X_train, y_train)

svm.score(X_test, y_test)

## KNN Classifier

In [None]:
knn = KNeighborsClassifier(n_neighbors = 18).fit(X_train, y_train)
knn.score(X_test, y_test)

## Decision Tree model

In [None]:
!pip install pydotplus

In [None]:
from IPython.display import Image  
from six import StringIO  
from sklearn.tree import export_graphviz
import pydotplus, graphviz
from sklearn.tree import DecisionTreeClassifier

In [None]:
def get_dt_graph(dt_classifier):
    dot_data = StringIO()
    export_graphviz(dt_classifier, out_file=dot_data, filled=True,rounded=True,
                    feature_names=X.columns, 
                    class_names=['Less chance', "More chance"])
    graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
    return graph

def evaluate_model(dt_classifier):
    print("Train Accuracy :", accuracy_score(y_train, dt_classifier.predict(X_train)))
    print("Train Confusion Matrix:")
    print(confusion_matrix(y_train, dt_classifier.predict(X_train)))
    print("-"*50)
    print("Test Accuracy :", accuracy_score(y_test, dt_classifier.predict(X_test)))
    print("Test Confusion Matrix:")
    print(confusion_matrix(y_test, dt_classifier.predict(X_test)))

In [None]:
dt = DecisionTreeClassifier(max_depth = 3).fit(X_train, y_train)

graph = get_dt_graph(dt)
Image(graph.create_png())

In [None]:
evaluate_model(dt)

## Decision Tree - Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
# Create the parameter grid based on the results of random search 
params = {
    'max_depth': [2, 3, 5, 10, 20],
    'min_samples_leaf': [5, 10, 20, 50, 100],
    'criterion': ["gini", "entropy"]
}

In [None]:
dt_grid = DecisionTreeClassifier()

grid_search = GridSearchCV(estimator=dt_grid, 
                           param_grid=params, 
                           cv=4, n_jobs=-1, verbose=1, scoring = "accuracy")

In [None]:
grid_search.fit(X_train, y_train)

In [None]:
score_df = pd.DataFrame(grid_search.cv_results_)
score_df.head()

In [None]:
dt_best = grid_search.best_estimator_

dt_best

In [None]:
evaluate_model(dt_best)

## Random Forest - Grid Search

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
params = {
    'max_depth': [1, 2, 5, 10, 20],
    'min_samples_leaf': [5, 10, 20, 50, 100],
    'max_features': [2,3,4],
    'n_estimators': [10, 30, 50, 100, 200]
}

In [None]:
rf = RandomForestClassifier()

grid_search = GridSearchCV(estimator=rf, param_grid=params, 
                          cv=4, n_jobs=-1, verbose=1, scoring = "accuracy")

In [None]:
grid_search.fit(X,y)

# best estimator
rf_best = grid_search.best_estimator_

rf_best

In [None]:
evaluate_model(rf_best)