In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
raw_data = pd.read_csv("../input/heart-attack-analysis-prediction-dataset/heart.csv")
raw_data.head()

It is a hearth attack analysis and prediction dataset.
<br>
1.age = age of the patient
<br>
2.sex = sex of the patient
<br>
3.exang = exercise induced angina ( 1 = yes , 2 = no)
<br>
4.ca = number of major vessels (0-3)
<br>
5.cp = chest pain type (1 = typcial angina, 2 = atypical angina , 
3 = non-anginal pain , 4 = asymptomatic)
<br>
6.trtbps = resting blood pressure (in mm Hg)
<br>
7.chol = cholestoral in mg/dl fetched via BMI sensor
<br>
8.restecg = resting electrocardiographic results ( 0 = normal , 1 = having ST-T wave abnormality , 2 = showing probable or definite left ventricular hypertrophy by Estes' criteria)
<br>
9.thalachh = max heart rate achieved
<br>
target  = 0 = less chance of heart attack , 1 = more chance of hearth attack

In [None]:
raw_data.info()

In [None]:
raw_data.isnull().sum()

In [None]:
raw_data.describe()

In [None]:
data = raw_data[raw_data['output']==1]
data.describe()

# Data Visualization

In [None]:
plt.figure(figsize=(10,10))
plt.style.use("ggplot")
sns.displot(data = data , x = data["age"] , hue = 'sex')
plt.title("Distrubiton of Age Around People Who Had Heart Attack" , fontsize = 20)
plt.xlabel("Age" , fontsize = 20)
plt.ylabel("Count" , fontsize = 20)
plt.show()

In [None]:
plt.figure(figsize=(10,10))
plt.style.use("ggplot")
sns.histplot(data["cp"])
plt.title("Type of Chest Pain With Number of People Who Had Heart Attack" , fontsize = 20)
plt.xlabel("Type" , fontsize = 20)
plt.ylabel("Count" , fontsize = 20)
plt.show()

In [None]:
plt.figure(figsize=(10,10))
plt.style.use("ggplot")
sns.distplot(data["trtbps"])
plt.title("Distribution of Blood Pressure Around People Who Had Heart Attack" , fontsize = 20)
plt.xlabel("Blood Pressure" , fontsize = 20)
plt.ylabel("Count" , fontsize = 20)
plt.show()

In [None]:
plt.figure(figsize=(10,10))
plt.style.use("ggplot")
sns.distplot(data["chol"])
plt.title("Distribution of Cholestrol Level Around People Who Had Heart Attack" , fontsize = 20)
plt.xlabel("Cholestrol Level" , fontsize = 20)
plt.ylabel("Count" , fontsize = 20)
plt.show()

In [None]:
plt.figure(figsize=(10,10))
plt.style.use("ggplot")
sns.distplot(data["thalachh"])
plt.title("Distribution of Heart Rate Around People Who Had Heart Attack" , fontsize = 20)
plt.xlabel("Heart Rate" , fontsize = 20)
plt.ylabel("Count" , fontsize = 20)
plt.show()

In [None]:
figure = plt.figure(figsize = (10,10))
sns.heatmap(raw_data.corr() , data = raw_data , annot = True , cmap = 'Greys')

# Data Preprocessing

In [None]:
data_target = raw_data['output']
raw_data.drop(columns=['slp','output'],inplace=True)

## pd.get_dummies / onehotencoder

In [None]:
data_dummies=raw_data[['sex','cp','fbs','restecg','exng','caa','thall']]
data_dummies= pd.get_dummies(data_dummies,columns=['sex','cp','fbs','restecg','exng','caa','thall'])

In [None]:
raw_data.drop(columns=['sex','cp','fbs','restecg','exng','caa','thall'],inplace=True)
data_final=raw_data.merge(data_dummies,left_index=True, right_index=True,how='left')
data_final.head()

## train_test_split

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(data_final,data_target,test_size=0.3,random_state=42)

## standardscaler

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

## LogisticRegression

In [None]:
colors = ['black' , 'grey']
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
from sklearn import metrics
def Model(model):
    model.fit(x_train,y_train)
    score = model.score(x_test,y_test)
    model_train_score = model.score(x_train , y_train)
    model_test_score = model.score(x_test,y_test)
    prediction = model.predict(x_test)
    cm = confusion_matrix(y_test , prediction)
    print('Testing Score \n' , score)
    plot_confusion_matrix(model, x_test,y_test)
    metrics.plot_roc_curve(model, x_test , y_test)

In [None]:
from sklearn.linear_model import LogisticRegression
lg_reg = LogisticRegression()
Model(lg_reg)

In [None]:
log_model = LogisticRegression()
log_model.fit(x_test,y_test)
predictions = log_model.predict(x_test)
print(confusion_matrix(y_test, predictions))