# About data set
## This database contains 76 attributes, but all published experiments refer to using a subset of 14 of them. In particular, the Cleveland database is the only one that has been used by ML researchers to
## this date.The "target" field refers to the presence of heart disease in the patient. It is integer valued 0 = no/less chance of heart attack and 1 = more chance of heart attack

# vAttribute Information
### 1) age
### 2) sex
### 3) chest pain type (4 values)
### 4) resting blood pressure
### 5) serum cholestoral in mg/dl
### 6)fasting blood sugar > 120 mg/dl
### 7) resting electrocardiographic results (values 0,1,2)
### 8) maximum heart rate achieved
### 9) exercise induced angina
### 10) oldpeak = ST depression induced by exercise relative to rest
### 11)the slope of the peak exercise ST segment
### 12) number of major vessels (0-3) colored by flourosopy
### 13) thal: 0 = normal; 1 = fixed defect; 2 = reversable defect
### 14) target: 0= less chance of heart attack 1= more chance of heart attack

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
df = pd.read_csv('../input/health-care-data-set-on-heart-attack-possibility/heart.csv')
df

In [None]:
df.head(10)

In [None]:
target =df[df['target']==1]
target

In [None]:
target.shape

In [None]:
no_target =df[df['target']==0]
no_target

In [None]:
no_target.shape

In [None]:
print("  target =1 :" ,1*len(target) / len(df)   *100)
print("  target =0 :" ,1*len(no_target) / len(df)   *100)

In [None]:
plt.figure(figsize=(6,12))
plt.subplot(211)
sns.countplot(x='sex' ,data=df)
plt.subplot(212)
sns.countplot(x='sex',hue='target' ,data=df)

In [None]:
for sex in df['sex'].unique():
    print(sex)

In [None]:
plt.figure(figsize=(6,12))
plt.subplot(211)
sns.countplot(x='thal' ,data=df)
plt.subplot(212)
sns.countplot(x='thal',hue='target' ,data=df)

In [None]:
plt.figure(figsize=(10,17))
plt.subplot(211)
sns.countplot(x='age' ,data=df)
plt.subplot(212)
sns.countplot(x='age',hue='target' ,data=df)

In [None]:
plt.figure(figsize=(6,12))
plt.subplot(211)
sns.countplot(x='ca' ,data=df)
plt.subplot(212)
sns.countplot(x='ca',hue='target' ,data=df)

In [None]:
plt.figure(figsize=(6,21))
plt.subplot(211)
sns.countplot(x='exang' ,data=df)
plt.subplot(212)
sns.countplot(x='exang',hue='target' ,data=df)

In [None]:
for gen in df['sex'].unique():
    print(gen)
    gender_df = df[ df['sex']==gen ]
    survived = gender_df[ gender_df['target']==1 ]
    survived_percentage = ( survived.shape[0] / gender_df.shape[0] ) * 100
    print("Count: ", gender_df.shape[0])
    print("Servived  : ", "%.2f" % survived_percentage, '%')
    
    print("------------")

In [None]:
plt.figure(figsize=(6,12))
plt.subplot(211)
sns.countplot(x='cp' ,data=df)
plt.subplot(212)
sns.countplot(x='cp' ,hue='target',data=df)

In [None]:
X = df.drop('target', axis = 1).values
y = df['target'].values

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size = 0.2)

In [None]:
X_train

In [None]:
X_test

In [None]:
y_train

In [None]:
X_train.shape

In [None]:
from sklearn.linear_model import LogisticRegression
l = LogisticRegression()
l.fit(X_train, y_train)

In [None]:
y_predict_test = l.predict(X_test)

In [None]:
y_predict_test

In [None]:
y_test

In [None]:
from sklearn.metrics import confusion_matrix ,classification_report

In [None]:
dff=confusion_matrix(y_test,y_predict_test)
sns.heatmap(dff, annot = True)

In [None]:
classification_report(y_test,y_predict_test)