In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

pd.set_option("display.max_columns", None)

In [None]:
!ls ../input/heart-attack-analysis-prediction-dataset

In [None]:
data = pd.read_csv("../input/heart-attack-analysis-prediction-dataset/heart.csv")
print(f"Heart Dataset Shape: {data.shape}")

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.nunique()

In [None]:
_ = data.corr()
fig = plt.figure(figsize=(15, 10))
sns.heatmap(_, annot=True)
plt.title("Data Correlation Heatmap", fontsize=14)
plt.show()

### Age

In [None]:
sns.distplot(data["age"])
plt.xlabel("Age", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.title("Age Distribution", fontsize=14)
plt.show()

In [None]:
data["age"].describe()

### Sex

In [None]:
sex_map = {
    1: "Male",
    0: "Female"
}

sns.countplot(data["sex"].map(sex_map))
plt.title("Sex Distribution", fontsize=14)
plt.xlabel("Sex", fontsize=12)
plt.ylabel("Count", fontsize=12)
plt.show()

In [None]:
data["sex"].map(sex_map).value_counts(normalize=True)

### Chest Pain Type

-- Value 1: typical angina
-- Value 2: atypical angina
-- Value 3: non-anginal pain
-- Value 4: asymptomatic

In [None]:
cp_map = {
    0: "Typical Angina",
    1: "Atypical Angina",
    2: "Non-Anginal Pain",
    3: "Asymptomatic",
}
fig = plt.figure(figsize=(10,8))
sns.countplot(data["cp"].map(cp_map))
plt.xlabel("Chest Pain Type", fontsize=12)
plt.ylabel("Count", fontsize=12)
plt.title("Chest Pain Types Distribution", fontsize=15)
plt.show()

In [None]:
data["cp"].map(cp_map).value_counts(normalize=True)

### Resting blood pressure (in mm Hg on admission to the hospital) 

In [None]:
sns.distplot(data["trtbps"])
plt.xlabel("Resting blood pressure", fontsize=12)
plt.ylabel("Density", fontsize=12)
plt.title("Resting blood pressure Distribution", fontsize=14)
plt.show()

In [None]:
data["trtbps"].describe()

### Serum Cholestoral in mg/dl 

In [None]:
sns.distplot(data["chol"])
plt.xlabel("Serum Cholestoral", fontsize=12)
plt.ylabel("Density", fontsize=12)
plt.title("Serum Cholestoral Distribution", fontsize=14)
plt.show()

In [None]:
data["chol"].describe()

### Fasting Blood Sugar > 120 mg/dl

In [None]:
fbs_map = {
    1: "True",
    0: "False"
}

sns.countplot(data["fbs"].map(fbs_map))
plt.title("Fasting Blood Sugar Distribution", fontsize=14)
plt.xlabel("Fasting Blood Sugar", fontsize=12)
plt.ylabel("Count", fontsize=12)
plt.show()

In [None]:
data["fbs"].map(fbs_map).value_counts()

### Resting Electrocardiographic Results 

In [None]:
restecg_map = {0: "normal", 
               1:"having ST-T wave abnormality", 
               2:"showing probable or definite left ventricular hypertrophy by Estes' criteria "}
fig = plt.figure(figsize=(10, 8))
sns.countplot(data["restecg"].map(restecg_map))
plt.xticks(rotation=45)
plt.ylabel("Count", fontsize=12)
plt.xlabel("Resting ECG", fontsize=12)
plt.title("Rest ECG Distribution", fontsize=15)
plt.show()

In [None]:
data["restecg"].map(restecg_map).value_counts()

### Maximum Heart Rate Achieved 

In [None]:
sns.distplot(data["thalachh"])
plt.xlabel("Maximum Heart Rate Achieved ", fontsize=12)
plt.ylabel("Density", fontsize=12)
plt.title("Maximum Heart Rate Achieved Distribution", fontsize=14)
plt.show()

In [None]:
data["thalachh"].describe()

### Exercise Induced Angina

In [None]:
exng_map = {
    1: "Yes",
    0: "No"
}

sns.countplot(data["exng"].map(fbs_map))
plt.title("Exercise Induced Angina Distribution", fontsize=14)
plt.xlabel("Exercise Induced Angina", fontsize=12)
plt.ylabel("Count", fontsize=12)
plt.show()

In [None]:
data["exng"].map(exng_map).value_counts()

### ST depression induced by exercise relative to rest 

In [None]:
sns.distplot(data["oldpeak"])
plt.xlabel("ST depression induced by exercise relative to rest", fontsize=12)
plt.ylabel("Density", fontsize=12)
plt.title("ST depression induced by exercise relative to rest Distribution", fontsize=14)
plt.show()

In [None]:
data["oldpeak"].describe()

### The slope of the peak exercise ST segment 

In [None]:
slope_map = {
    0: "Upsloping",
    1: "Flat",
    2: "Downsloping"
}

sns.countplot(data["slp"].map(slope_map))
plt.title("The slope of the peak exercise ST segment Distribution", fontsize=14)
plt.xlabel("The slope of the peak exercise ST segment", fontsize=12)
plt.ylabel("Count", fontsize=12)
plt.show()

In [None]:
data["slp"].map(slope_map).value_counts(normalize=True)

### Number of major vessels (0-3) colored by flourosopy 

In [None]:
sns.countplot(data["caa"])
plt.title("Number of major vessels (0-3) colored by flourosopy Distribution", fontsize=14)
plt.xlabel("Number of major vessels (0-3) colored by flourosopy", fontsize=12)
plt.ylabel("Count", fontsize=12)
plt.show()

In [None]:
data["caa"].value_counts(normalize=True)

### Thall

In [None]:
data["thall"].value_counts(normalize=True)

### Output

In [None]:
output_map = {
    0: "False",
    1: True
}
sns.countplot(data["output"].map(output_map))
plt.title("Target Distribution", fontsize=14)
plt.show()