# Heart Attack Prediction

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## 1. Data collection :-

In [None]:
df = pd.read_csv("../input/heart-attack-prediction/data.csv",na_values=['?'], low_memory = False)
df

## 2.Data Exploration:-

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df['age'].unique()

In [None]:
df['sex'].unique()

In [None]:
df['fbs'].unique()

In [None]:
df['chol'].unique()

In [None]:
df['fbs'].unique()

In [None]:
df['thalach'].unique()

In [None]:
df['cp'].unique()

In [None]:
df['restecg'].unique()

In [None]:
df['slope'].unique()

In [None]:
df['ca'].unique()

In [None]:
df['thal'].unique()

In [None]:
df['exang'].unique()

In [None]:
df.head()

In [None]:
df.tail()

## 3. Data Cleaning:-

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.drop(columns=['ca'],inplace=True)

In [None]:
df.shape

In [None]:
df.isnull().sum()

age,sex,cp,old peak,num dont contain any null value .

In [None]:
df.skew()

In [None]:
df['thalach'].fillna(df['thalach'].mean(),inplace=True)

In [None]:
df.fillna(df.median(),inplace=True)

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
df.drop_duplicates(inplace=True)

In [None]:
df.shape

## 4.Feature Selection:-

In [None]:
cor=df.corr()
plt.figure(figsize=(20,12))
sns.heatmap(cor,annot=True,cmap='coolwarm')
plt.show()

In [None]:
df.columns

In [None]:
df=df.rename(columns={"num       ":"num"})

In [None]:
df.columns

In [None]:
plt.figure(figsize=(12,5))
plt.title("AGE VRS NUM")
sns.distplot(df.age[df.num==0],color="darkblue")
sns.distplot(df.age[df.num==1],color="cyan")
plt.legend(['0','1'])
plt.show()

In [None]:
df.hist()
plt.figure(figsize=(20,12))
plt.show()

In [None]:
plt.figure(figsize=(12,5))
plt.title("TRESTBPS VRS NUM")
sns.distplot(df.trestbps[df.num==0],color="darkblue")
sns.distplot(df.trestbps[df.num==1],color="cyan")
plt.legend(['0','1'])
plt.show()

In [None]:
plt.figure(figsize=(12,5))
plt.title("CHO VRS NUM")
sns.distplot(df.chol[df.num==0],color="darkblue")
sns.distplot(df.chol[df.num==1],color="cyan")
plt.legend(['0','1'])
plt.show()

In [None]:
plt.figure(figsize=(12,5))
plt.title("THALACH VRS NUM")
sns.distplot(df.thalach[df.num==0],color="darkblue")
sns.distplot(df.thalach[df.num==1],color="cyan")
plt.legend(['0','1'])
plt.show()

In [None]:
plt.figure(figsize=(6,3))
plt.title("SEX VRS NUM")
sns.countplot(df.sex)
plt.show()
sns.countplot(df.sex[df.num==1])
plt.show()

In [None]:
plt.figure(figsize=(6,3))
plt.title("CP VRS NUM")
sns.countplot(df.cp)
plt.show()
sns.countplot(df.cp[df.num==1])
plt.show()

In [None]:
plt.figure(figsize=(6,3))
plt.title("FBS VRS NUM")
sns.countplot(df.fbs)
plt.show()
sns.countplot(df.fbs[df.num==1])
plt.show()

In [None]:
plt.figure(figsize=(6,3))
plt.title("RESTECG VRS NUM")
sns.countplot(df.restecg)
plt.show()
sns.countplot(df.restecg[df.num==1])
plt.show()

In [None]:
plt.figure(figsize=(6,3))
plt.title("EXANG VRS NUM")
sns.countplot(df.exang)
plt.show()
sns.countplot(df.exang[df.num==1])
plt.show()

In [None]:
plt.figure(figsize=(6,3))
plt.title("SLOPE VRS NUM")
sns.countplot(df.slope)
plt.show()
sns.countplot(df.slope[df.num==1])
plt.show()

In [None]:
plt.figure(figsize=(6,3))
plt.title("THAL VRS NUM")
sns.countplot(df.thal)
plt.show()
sns.countplot(df.thal[df.num==1])
plt.show()

In [None]:
plt.figure(figsize=(12,5))
plt.title("SEX VRS AGE VRS NUM")
sns.pointplot(x = 'sex',y='age',hue='num',data=df)
plt.show()

In [None]:
plt.figure(figsize=(12,5))
plt.title("CHOL VRS AGE VRS NUM")
sns.scatterplot(x='chol',y='age',hue='num',data=df)
plt.show()

## Split the data into train and test set:-

In [None]:
x=df[['age', 'sex', 'cp','trestbps', 'chol', 'fbs','restecg', 'thalach', 'exang', 'oldpeak', 'slope','thal']]
y=df['num']
print(x)
print(y)

In [None]:
# split data into train and test
from sklearn.model_selection import train_test_split
xtr,xts,ytr,yts = train_test_split(x,y,test_size=0.2)
# we have to split the data into 80% as train and 20% as test so we have specified test_size as 0.2
print(x.shape)
print(xtr.shape)
print(xts.shape)
print(y.shape)
print(ytr.shape)
print(yts.shape)

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components = 1)
xtr = pca.fit_transform(xtr)
xts = pca.transform(xts)
explained_variance = pca.explained_variance_ratio_

# SVM

In [None]:
from sklearn.svm import SVC

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
svc_model=SVC()

In [None]:
svc_model.fit(xtr,ytr)

In [None]:
y_pred=svc_model.predict(xts)

In [None]:
cm=confusion_matrix(yts,y_pred)

In [None]:
sns.heatmap(cm,annot=True)

## Normalisation(1):-

In [None]:
min_train = xtr.min()
min_train

In [None]:
range_train = (xtr - min_train).max()
range_train

In [None]:
X_train_scaled = (xtr - min_train)/range_train
X_train_scaled

In [None]:
min_test = xts.min()
range_test = (xts - min_test).max()
X_test_scaled = (xts - min_test)/range_test

In [None]:
from sklearn.svm import SVC 
from sklearn.metrics import classification_report, confusion_matrix

svc_model = SVC()
svc_model.fit(X_train_scaled, ytr)

In [None]:
y_predict = svc_model.predict(X_test_scaled)
cm = confusion_matrix(yts, y_predict)

sns.heatmap(cm,annot=True,fmt="d")

In [None]:
print(classification_report(yts,y_predict))

## Normalisation(2):

In [None]:
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf']} 

In [None]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=4)
grid.fit(X_train_scaled,ytr)

In [None]:
grid.best_params_

In [None]:
grid.best_estimator_

In [None]:
grid_predictions = grid.predict(X_test_scaled)
cm = confusion_matrix(yts, grid_predictions)
sns.heatmap(cm, annot=True)

In [None]:
print(classification_report(yts,grid_predictions))

In [None]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = xtr, y = ytr, cv = 10)
accuracies

In [None]:
print(accuracies.mean())
print(accuracies.std())