# HEART DISEASE ANALYSIS

# Contents

# 1) INTRODUCTION:

# 2) Reading the data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime


In [None]:
heart=pd.read_csv('../input/heart.csv')

In [None]:
# To see the first top five rows in our uploaded data #
heart.head()

In [None]:
# To see the Last five rows in our uploaded data #
heart.tail()

In [None]:
# Now we will look at summary statistics of our data #
# summary statistics are used to summarize a set of observations, in order to communicate the largest amount of information as simply as possible #

heart.describe()

In [None]:
#   To get a concise summary of the dataframe #
heart.info()

In [None]:
# We will list all the columns in our loaded dataset #
heart.columns

In [None]:
# Now we will see how many rows and columns are present in our loaded dataset #
heart.shape


In [None]:
# To find the how many missing values in our data #
heart.isnull().sum()

# 3) Data Exploration

In [None]:
heart.target.value_counts()

In [None]:
sns.countplot(x="target",data=heart)

In [None]:
heart.age.value_counts()[:15]

In [None]:
sns.barplot(x=heart.age.value_counts()[:15].index,y=heart.age.value_counts()[:15].values)
plt.xlabel('Age')
plt.ylabel('Age Count')
plt.title('Age Analysis System')
plt.show()

In [None]:
heart.sex.value_counts()

In [None]:
sns.countplot(x='sex', data=heart)
plt.xlabel("Sex (0 = female, 1= male)")
plt.show()

In [None]:
male_disease=heart[(heart.sex==1) & (heart.target==1)]          ## Here we have sex=1(male) and target =1(have disease)
male_NO_disease=heart[(heart.sex==1) & (heart.target==0)]       ## Here we have sex=1(male) and target =0(have no disease )
print(len(male_disease),"male_disease")
print(len(male_NO_disease),"male_NO_disease")

In [None]:
a=len(male_disease)
b=len(male_NO_disease)
sns.barplot(x=['male_disease ','male_NO_disease'],y=[a,b])
plt.xlabel('Male and Target')
plt.ylabel('Count')
plt.title('State of the Gender')
plt.show()


In [None]:
female_disease=heart[(heart.sex==0) & (heart.target==1)]          ## Here we have sex=0(female) and target =1(have disease)
female_NO_disease=heart[(heart.sex==0) & (heart.target==0)]       ## Here we have sex=0(female) and target =0(have no disease )
print(len(female_disease),"female_disease")
print(len(female_NO_disease),"female_NO_disease")

In [None]:
c=len(female_disease)
d=len(female_NO_disease)
sns.barplot(x=['female_disease ','female_NO_disease'],y=[c,d])
plt.xlabel('Female and Target')
plt.ylabel('Count')
plt.title('State of the Gender')
plt.show()

In [None]:
heart["cp"].value_counts()

In [None]:
sns.countplot(x='cp', data=heart)
plt.xlabel(" Chest type")
plt.ylabel("Count")
plt.title("Chest type Vs count plot")
plt.show()

In [None]:
print(len(heart[(heart.cp==0)&(heart.target==0)]),"=cp_zero_target_zero")
print(len(heart[(heart.cp==0)&(heart.target==1)]),"=cp_zero_target_one")
print(len(heart[(heart.cp==1)&(heart.target==0)]),"=cp_one_target_zero")
print(len(heart[(heart.cp==1)&(heart.target==1)]),"=cp_one_target_one")

In [None]:
target_0=len(heart[(heart.cp==0)&(heart.target==0)])
target_1=len(heart[(heart.cp==0)&(heart.target==1)])
plt.subplot(1,2,1)
sns.barplot(x=["target_0","target_1"],y=[target_0,target_1])
plt.ylabel("Count")
plt.title("Chest_type_0 Vs count plot")


target_0=len(heart[(heart.cp==1)&(heart.target==0)])
target_1=len(heart[(heart.cp==1)&(heart.target==1)])
plt.subplot(1,2, 2)
sns.barplot(x=["target_0","target_1"],y=[target_0,target_1])
plt.ylabel("Count")
plt.title("Chest_type_1 Vs count plot")



In [None]:
print(len(heart[(heart.cp==2)&(heart.target==0)]),"=cp_two_target_zero")
print(len(heart[(heart.cp==2)&(heart.target==1)]),"=cp_two_target_one")
print(len(heart[(heart.cp==3)&(heart.target==0)]),"=cp_three_target_zero")
print(len(heart[(heart.cp==3)&(heart.target==1)]),"=cp_three_target_one")

In [None]:
target_0=len(heart[(heart.cp==2)&(heart.target==0)])
target_1=len(heart[(heart.cp==2)&(heart.target==1)])
plt.subplot(1,2,1)
sns.barplot(x=["target_0","target_1"],y=[target_0,target_1])
plt.ylabel("Count")
plt.title("Chest_type_2 Vs count plot")


target_0=len(heart[(heart.cp==3)&(heart.target==0)])
target_1=len(heart[(heart.cp==3)&(heart.target==1)])
plt.subplot(1,2, 2)
sns.barplot(x=["target_0","target_1"],y=[target_0,target_1])
plt.ylabel("Count")
plt.title("Chest_type_3 Vs count plot")


In [None]:
plot = heart[heart.target == 1].trestbps.value_counts().sort_index().plot(kind = "bar", figsize=(15,4), fontsize = 15)
plot.set_title("Resting blood pressure", fontsize = 20)

In [None]:
heart.chol.value_counts()[:20]

In [None]:
sns.barplot(x=heart.chol.value_counts()[:20].index,y=heart.chol.value_counts()[:20].values)
plt.xlabel('chol')
plt.ylabel('Count')
plt.title('chol Counts')
plt.xticks(rotation=45)
plt.show()

In [None]:
age_unique=sorted(heart.age.unique())
age_chol_values=heart.groupby('age')['chol'].count().values
mean_chol=[]
for i,age in enumerate(age_unique):
    mean_chol.append(sum(heart[heart['age']==age].chol)/age_chol_values[i])
    

In [None]:
plt.figure(figsize=(10,5))
sns.pointplot(x=age_unique,y=mean_chol,color='red',alpha=0.8)
plt.xlabel('age',fontsize = 15,color='blue')
plt.xticks(rotation=45)
plt.ylabel('chol',fontsize = 15,color='blue')
plt.title('age vs chol',fontsize = 15,color='blue')
plt.grid()
plt.show()

In [None]:
print(len(heart[(heart.fbs==1)&(heart.target==0)]),"=fbs_one_target_zero")
print(len(heart[(heart.fbs==1)&(heart.target==1)]),"=fbs_one_target_one")

In [None]:
target_0=len(heart[(heart.fbs==1)&(heart.target==0)])
target_1=len(heart[(heart.fbs==1)&(heart.target==1)])
plt.subplot(1,2,1)
sns.barplot(x=["target_0","target_1"],y=[target_0,target_1])
plt.ylabel("Count")
plt.title("fbs_type_1 Vs count plot")


In [None]:
print(len(heart[(heart.restecg==1)&(heart.target==0)]),"=restecg_one_target_zero")
print(len(heart[(heart.restecg==1)&(heart.target==1)]),"=restecg_one_target_one")

In [None]:
plot = heart[heart.target == 1].thalach.value_counts().sort_index().plot(kind = "bar", figsize=(15,4), fontsize = 10)
plot.set_title("thalach", fontsize = 15)

In [None]:
heart.thal.value_counts()

In [None]:
print(len(heart[(heart.thal==3)&(heart.target==0)]),"=thal_three_target_zero")
print(len(heart[(heart.thal==3)&(heart.target==1)]),"=thal_three_target_one")

In [None]:
target_0=len(heart[(heart.thal==3)&(heart.target==0)])
target_1=len(heart[(heart.thal==3)&(heart.target==1)])
plt.subplot(1,2,1)
sns.barplot(x=["target_0","target_1"],y=[target_0,target_1])
plt.ylabel("Count")
plt.title("thal_type_3 Vs count plot")

In [None]:
print(len(heart[(heart.thal==6)&(heart.target==0)]),"=thal_7_target_zero")   # Here thal for (6 = fixed defect) has no heart disease
print(len(heart[(heart.thal==6)&(heart.target==1)]),"=thal_7_target_one")

In [None]:
print(len(heart[(heart.thal==7)&(heart.target==0)]),"=thal_7_target_zero")  # Here thal for (7 = reversable defect) has no heart disease
print(len(heart[(heart.thal==7)&(heart.target==1)]),"=thal_7_target_one")

In [None]:
cp = pd.get_dummies(heart['cp'], prefix = "cp", drop_first=True)
thal = pd.get_dummies(heart['thal'], prefix = "thal" , drop_first=True)
slope = pd.get_dummies(heart['slope'], prefix = "slope", drop_first=True)

#Removing the first level.

In [None]:
data = pd.concat([heart, cp, thal, slope], axis=1)
data.head()

In [None]:
data.drop(['cp', 'thal', 'slope'], axis=1, inplace=True)
data.head()

In [None]:
a = data.drop(['target'], axis=1)
b = data.target

In [None]:
print(a.shape)

In [None]:
a.corr()

In [None]:
a = (a - a.min())/(a.max()-a.min())
a.head()

In [None]:
from sklearn.model_selection import train_test_split
a_train, a_test, b_train, b_test = train_test_split(a, b, test_size=0.2, random_state=2)

# 4) Creating model

In [None]:
from sklearn.linear_model import LogisticRegression
logi = LogisticRegression()
logi.fit(a_train, b_train)
logi.score(a_test, b_test)

In [None]:
from sklearn.model_selection import GridSearchCV
 ## Setting parameters for GridSearchCV
params = {'penalty':['l1','l2'],
         'C':[0.01,0.1,1,10,100],
         'class_weight':['balanced',None]}
logi_model = GridSearchCV(logi,param_grid=params,cv=10)

In [None]:
logi_model.fit(a_train,b_train)
logi_model.best_params_

In [None]:
logi = LogisticRegression(C=1, penalty='l2')
logi.fit(a_train, b_train)
logi.score(a_test, b_test)

# 5) Explanation

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(b_test, logi.predict(a_test))
sns.heatmap(cm, annot=True)
plt.plot()

# 6) Conclusion