In [None]:
pip install missingno

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import missingno as msno
plt.style.use("ggplot")

In [None]:
df = pd.read_csv("/kaggle/input/heart-disease-uci/heart.csv")
df.head()

In [None]:
msno.matrix(df)
plt.title("Are There NaN Datas?",fontsize=20)
plt.show()

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(),annot=True,fmt=".0%",cmap="coolwarm")
plt.title("Correlation Matrix",fontsize=20)
plt.show()

In [None]:
categorical = ["target",
"sex",
"cp",
"fbs",
"restecg",
"exang",
"slope",
"ca",
"thal"]

In [None]:
plt.figure(figsize=(14,20))
for i in range(1,10):
    labels = df[categorical[i-1]].value_counts().index
    sizes  = df[categorical[i-1]].value_counts().values
    plt.subplot(5,2,i)
    plt.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
    plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    plt.xticks([])
    plt.yticks([])
    plt.title(categorical[i-1].upper())
plt.show()

In [None]:
fig = px.parallel_coordinates(df, color="target",
                              color_continuous_scale=px.colors.diverging.Tealrose, color_continuous_midpoint=0.5)
fig.show()

In [None]:
sns.pairplot(df[["age","trestbps","chol","thalach","oldpeak","target"]], hue="target", markers=["x","*"],corner=True)
plt.show()

In [None]:
plt.figure(figsize=(14,10))
plt.subplot(2,2,1)
df.groupby(["sex"])["target"].mean().plot(lw=2.7, marker="s", color="teal", markerfacecolor='maroon', markersize=7)
plt.ylabel("Average of target")
plt.xticks([0,1])
plt.subplot(2,2,2)
df.groupby(["cp"])["target"].mean().plot(lw=2.7, marker="s", color="teal", markerfacecolor='maroon', markersize=7)
plt.ylabel("Average of target")
plt.xticks([0,1,2,3])
plt.subplot(2,2,3)
df.groupby(["fbs"])["target"].mean().plot(lw=2.7, marker="s", color="teal", markerfacecolor='maroon', markersize=7)
plt.ylabel("Average of target")
plt.xticks([0,1])
plt.subplot(2,2,4)
df.groupby(["restecg"])["target"].mean().plot(lw=2.7, marker="s", color="teal", markerfacecolor='maroon', markersize=7)
plt.ylabel("Average of target")
plt.xticks([0,1,2])
plt.show()

In [None]:
plt.figure(figsize=(14,10))
plt.subplot(2,2,1)
df.groupby(["slope"])["target"].mean().plot(lw=2.7, marker="s", color="teal", markerfacecolor='maroon', markersize=7)
plt.ylabel("Average of target")
plt.xticks([0,1,2])
plt.subplot(2,2,2)
df.groupby(["ca"])["target"].mean().plot(lw=2.7, marker="s", color="teal", markerfacecolor='maroon', markersize=7)
plt.ylabel("Average of target")
plt.xticks([0,1,2,3,4])
plt.subplot(2,2,(3,4))
df.groupby(["thal"])["target"].mean().plot(lw=2.7, marker="s", color="teal", markerfacecolor='maroon', markersize=7)
plt.ylabel("Average of target")
plt.xticks([0,1,2,3])
plt.show()

In [None]:
plt.figure(figsize=(10,17))
plt.subplot(5,2,1)
sns.kdeplot(df.loc[df["target"]==1]["age"],color="green",shade=True)
sns.kdeplot(df.loc[df["target"]==0]["age"],color="red",shade=True)
plt.legend(["target:1","target:0"])
plt.title("Age".upper())
    
for i in range(2,9):
    plt.subplot(5,2,i)
    sns.boxenplot(data=df, x=categorical[i-1],y="age")
    
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,17))
plt.subplot(5,2,1)
sns.kdeplot(df.loc[df["target"]==1]["chol"],color="green",shade=True)
sns.kdeplot(df.loc[df["target"]==0]["chol"],color="red",shade=True)
plt.legend(["target:1","target:0"])
plt.title("chol".upper())
    
for i in range(2,9):
    plt.subplot(5,2,i)
    sns.boxenplot(data=df, x=categorical[i-1],y="chol")
    
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,17))
plt.subplot(5,2,1)
sns.kdeplot(df.loc[df["target"]==1]["thalach"],color="green",shade=True)
sns.kdeplot(df.loc[df["target"]==0]["thalach"],color="red",shade=True)
plt.legend(["target:1","target:0"])
plt.title("thalach".upper())
    
for i in range(2,9):
    plt.subplot(5,2,i)
    sns.boxenplot(data=df, x=categorical[i-1],y="thalach")
    
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,17))
plt.subplot(5,2,1)
sns.kdeplot(df.loc[df["target"]==1]["oldpeak"],color="green",shade=True)
sns.kdeplot(df.loc[df["target"]==0]["oldpeak"],color="red",shade=True)
plt.legend(["target:1","target:0"])
plt.title("oldpeak".upper())
    
for i in range(2,9):
    plt.subplot(5,2,i)
    sns.boxenplot(data=df, x=categorical[i-1],y="oldpeak")
    
plt.tight_layout()
plt.show()

In [None]:
from sklearn.decomposition import PCA

X = df.drop("target",axis=1)

pca = PCA(n_components=2)
components = pca.fit_transform(X)

components = pd.DataFrame(components)
components["target"] = df['target']

plt.figure(figsize=(14,6))
sns.scatterplot(data=pd.DataFrame(components), x=0, y=1, hue='target',palette=["teal","maroon"])
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.title("PCA",fontsize=20)
plt.show()