In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import seaborn as sns
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


#                                                                        Heart Disease UCI
![Heart](https://img.webmd.com/dtmcms/live/webmd/consumer_assets/site_images/articles/health_tools/how_heart_disease_affects_your_body_slideshow/493ss_thinkstock_rf_heart_anatomy_illustration.jpg)


In [None]:
df = pd.read_csv(r"/kaggle/input/heart-disease-uci/heart.csv")
df.head(10)

**Attribute Information**

1.age

2.sex

3.chest pain type (4 values)

4.resting blood pressure

5.serum cholestoral in mg/dl

6.fasting blood sugar > 120 mg/dl

7.resting electrocardiographic results (values 0,1,2)

8.maximum heart rate achieved

9.exercise induced angina

10.oldpeak = ST depression induced by exercise relative to rest

11.the slope of the peak exercise ST segment

12.number of major vessels (0-3) colored by flourosopy

13.thal: 3 = normal; 6 = fixed defect; 7 = reversable defect***[](http://)**

In [None]:
df.info()

In [None]:
df.describe()




**Exploratory Data Analysis (EDA) and Data Visualization**

In [None]:
sns.countplot(x="target",data=df)
plt.xlabel("Target")
plt.ylabel("Count")
plt.title("Count of target values")
plt.show()

In [None]:
sns.color_palette("deep")
sns.set_style("whitegrid")
sns.histplot(x="age",data=df)
plt.title("Histogram of Age")
plt.xlabel("Age Values")
plt.show()

In [None]:
#Countplot of Sex : 0 = Female, 1= Male
sns.countplot(x="sex",data=df,hue="target")
plt.xlabel("sex")
plt.ylabel("Count")
plt.title("Count of Sex")
plt.show()

In [None]:
#0: typical angina, 
#1: atypical angina,
#2: non-anginal pain,
#3: asymptomatic

sns.countplot(x="cp",data=df,hue="target")
plt.xlabel("Chest Pain Type")
plt.ylabel("Count")
plt.title("Count of Chest Pain Type")
plt.show()

In [None]:
sns.histplot(x="trestbps",data=df)
plt.title("Histogram of resting blood pressure (in mm Hg on admission to the hospital)")
plt.xlabel("resting blood pressure (in mm Hg on admission to the hospital)")
plt.show()

In [None]:
sns.histplot(x="chol",data=df)
plt.title("Histogram of chol: serum cholestoral in mg/dl")
plt.xlabel("chol: serum cholestoral in mg/dl")
plt.show()

In [None]:
#fbs: (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)
sns.countplot(x="fbs",data=df,hue="target")
plt.title("Count of fbs: (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)")
plt.xlabel("fbs: (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)")
plt.show()

In [None]:
#0: normal
#1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
#2: showing probable or definite left ventricular hypertrophy by Estes' criteria
sns.countplot(x="restecg",data=df,hue="target")
plt.title("Count of restecg: resting electrocardiographic results")
plt.xlabel("restecg: resting electrocardiographic results")
plt.show()

In [None]:
sns.histplot(x="thalach",data=df)
plt.title("Histogram of thalach: maximum heart rate achieved")
plt.xlabel("thalach: maximum heart rate achieved")
plt.show()

In [None]:
sns.countplot(x="exang",data=df,hue="target")
plt.title("Count of exang: exercise induced angina (1 = yes; 0 = no)")
plt.xlabel("exang: exercise induced angina (1 = yes; 0 = no)")
plt.show()

Angina is a type of pain that occurs when not enough blood flows to the heart muscle. Angina may feel like pressure in the chest, jaw or arm. It frequently may occur with exercise or stress. Some people with angina also report feeling lightheaded, overly tired, short of breath or nauseated.

In [None]:
sns.histplot(x="oldpeak",data=df)
plt.title("Histogram of oldpeak = ST depression induced by exercise relative to rest")
plt.xlabel("oldpeak = ST depression induced by exercise relative to rest")
plt.show()

In [None]:
#slope: the slope of the peak exercise ST segment
# 1: upsloping
# 2: flat
# 3: downsloping
sns.countplot(x="slope",data=df,hue="target")
plt.title("Histogram of slope: the slope of the peak exercise ST segment")
plt.xlabel("slope: the slope of the peak exercise ST segment")
plt.show()

In [None]:
sns.countplot(x="ca",data=df,hue="target")
plt.title("Count of  ca: number of major vessels (0-3) colored by flourosopy")
plt.xlabel(" ca: number of major vessels (0-3) colored by flourosopy")
plt.show()

In [None]:
sns.countplot(x="thal",data=df,hue="target")
plt.title("Count of  thal: 3 = normal; 6 = fixed defect; 7 = reversable defect")
plt.xlabel(" thal: 3 = normal; 6 = fixed defect; 7 = reversable defect")
plt.show()

In [None]:
df.isnull().sum()

In [None]:
# Correlation amongst numeric attributes
corrmat = df.corr()
plt.subplots(figsize=(10,10))
sns.heatmap(corrmat,annot=True, square=True)

**Modelling**

In [None]:
X = df.drop('target', axis = 1)
y = df.target

In [None]:
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=0.3,random_state=21)
model1 = LogisticRegression()
model1.fit(X_train, y_train)
ypred = model1.predict(X_test)
evaluation = f1_score(y_test, ypred)
print("Predicted Values are : " +str(ypred), "Evaluation is : " +str(evaluation))

In [None]:
print(classification_report(y_test, ypred))

In [None]:
cf_matrix = confusion_matrix(y_test, ypred)

sns.heatmap(cf_matrix, annot=True)

In [None]:
model2  = DecisionTreeClassifier()
model2.fit(X_train,y_train)
ypred2 = model2.predict(X_test)
evaluation2 = f1_score(y_test,ypred2)
print("Predicted Values are : " +str(ypred2), "Evaluation is : " +str(evaluation2))

In [None]:
print(classification_report(y_test, ypred2))

In [None]:
cf_matrix = confusion_matrix(y_test, ypred2)

sns.heatmap(cf_matrix, annot=True)

In [None]:
model3 = RandomForestClassifier()
model3.fit(X_train,y_train)
ypred3 = model3.predict(X_test)
evaluation3 = f1_score(y_test,ypred3)
print("Predicted Values are : " +str(ypred3), "Evaluation is : " +str(evaluation3))

In [None]:
print(classification_report(y_test, ypred3))

In [None]:
cf_matrix = confusion_matrix(y_test, ypred3)

sns.heatmap(cf_matrix, annot=True)