## Importing Essential Libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 

import warnings
warnings.filterwarnings("ignore")

## Importing Data

In [None]:
heart = pd.read_csv("../input/heart-attack-analysis-prediction-dataset/heart.csv")

## Exploratory Data Analysis

In [None]:
heart.head()

In [None]:
heart.info()
#Data doesn't have null values

In [None]:
heart.describe()

In [None]:
heart.output.value_counts()
#Data is not skewed as both outputs are nearly evenly divided

In [None]:
heart[heart.duplicated()]
#One row is a duplicate

In [None]:
heart.drop_duplicates(inplace=True)
#Removed duplicated values

In [None]:
corr_matrix = heart.drop(['output'], axis=1).corr()
fig, ax = plt.subplots(figsize=(10, 10))
sns.heatmap(corr_matrix, annot=True)
plt.show()
#Input data are not related to each other, therefore there is no redundant information

In [None]:
heart.corr()['output'][:].sort_values(ascending=False)
#Correlation to our output variable

In [None]:
X = heart.drop(['output'], axis=1)
y = heart['output']

## Dealing with Categorical and Numerical data

In [None]:
categorical_cols = []
numerical_cols = []

for col in X.columns:
    if (X[col].nunique() <= 5):
        categorical_cols.append(col)
    else:
        numerical_cols.append(col)
        
#Separating categorical from numerical data

In [None]:
X = pd.get_dummies(X, columns=categorical_cols, drop_first=True)
#Encoding categorical columns

## Splitting Test and Training

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Scaling Dataset

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

## Testing Various Models

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)

pred = model.predict(X_test)
print(accuracy_score(pred,y_test))

In [None]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(X_train, y_train)

pred = model.predict(X_test)
print(accuracy_score(pred,y_test))

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_train, y_train)

pred = model.predict(X_test)
print(accuracy_score(pred,y_test))

In [None]:
from sklearn.svm import SVC

model = SVC()
model.fit(X_train, y_train)

pred = model.predict(X_test)
print(accuracy_score(pred,y_test))


## Tuning Hyperparameters of Best Model

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000] }
model = GridSearchCV(LogisticRegression(penalty='l2'), param_grid)
model.fit(X_train, y_train)

pred = model.predict(X_test)
print(accuracy_score(pred,y_test))
