In [None]:
# === SECTION 1: IMPORTS & LOAD RAW DATA ===
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('heart.csv')

In [None]:
# === SECTION 2: CORRELATION ANALYSIS (RAW DATA) ===
# --- 2.1. Target Variable Distribution ---
plt.figure(figsize=(6, 4))
sns.countplot(x='HeartDisease', data=data)
plt.title('Distribution of Heart Disease (1 = Disease, 0 = Normal)')
plt.show()

# --- 3.4. Correlation Matrix Heatmap ---
data_for_heatmap = pd.get_dummies(data, drop_first=True)

plt.figure(figsize=(18, 15))
sns.heatmap(data_for_heatmap.corr(), annot=True, fmt='.2f', cmap="BrBG")
plt.title('Correlation Matrix of All Features')
plt.show()

In [None]:
# === SECTION 3: FEATURE DISTRIBUTIONS ===
# --- 3.1. Numerical Feature Distribution ---

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# 1. Load Data
df = pd.read_csv('heart.csv')

# 2. Map Target to readable labels for the Legend
df['Status'] = df['HeartDisease'].map({0: 'Normal', 1: 'Heart Disease'})

# 3. Define ONLY Numerical Features
numerical_cols = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']

# 4. Setup Grid (2 Rows x 3 Columns to fit 5 plots)
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

sns.set_style("whitegrid")
# Colors: Blue = Normal, Red = Heart Disease
my_palette = {"Normal": "#3498db", "Heart Disease": "#e74c3c"}

# 5. Loop through Numerical features only
for i, col in enumerate(numerical_cols):
    ax = axes[i]

    # Plot the "Waves" (KDE)
    sns.kdeplot(
        data=df,
        x=col,
        hue='Status',
        palette=my_palette,
        fill=True,
        common_norm=False,
        alpha=0.3,
        linewidth=2.5,
        ax=ax,
        legend=True # Ensure legend is created
    )

    # Customize the Plot
    ax.set_title(f"{col} Distribution", fontsize=14, fontweight='bold')
    ax.set_ylabel("Density", fontsize=12)
    ax.set_xlabel(col, fontsize=12)

    # FORCE Legend to be inside the plot (Upper Right corner)
    # Seaborn sometimes creates a 'legend' object or includes it in the hue.
    # We explicitly move the existing legend to the inside.
    sns.move_legend(ax, "upper right", title=None, frameon=True)

# 6. Remove the empty 6th subplot (since we only have 5 features)
if len(numerical_cols) < len(axes):
    fig.delaxes(axes[-1])

plt.tight_layout()
plt.show()

# --- 3.1. Categorical Feature Distribution ---

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load Data
df = pd.read_csv('heart.csv')
df['Status'] = df['HeartDisease'].map({0: 'Normal', 1: 'Heart Disease'})

# 2. Define Features
categorical_cols = ['Sex', 'ChestPainType', 'FastingBS', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

# 3. Setup Grid
fig, axes = plt.subplots(2, 3, figsize=(18, 10)) # Compact 2x3 grid
axes = axes.flatten()

# Soothing Colors
color_normal = "#2E4053"      # Deep Navy
color_disease = "#F4D03F"     # Muted Gold

# 4. Loop and Plot
for i, col in enumerate(categorical_cols):
    ax = axes[i]

    # Calculate Cross-tabulation (Counts)
    # This creates a table: Index=Category, Columns=Status (Normal, Heart Disease)
    cross_tab = pd.crosstab(df[col], df['Status'])

    # Sort by total count (optional, makes it look organized)
    cross_tab['Total'] = cross_tab.sum(axis=1)
    cross_tab = cross_tab.sort_values('Total', ascending=True)
    cross_tab = cross_tab.drop(columns='Total')

    # Plot Horizontal Stacked Bar
    cross_tab.plot(
        kind='barh',
        stacked=True,
        color=[color_disease, color_normal], # Note: Check order in your legend (usually alphabetized)
        ax=ax,
        edgecolor='white',
        width=0.7
    )

    # Add Data Labels (Counts or Percentages)
    # We will add the raw count inside each segment
    for c in ax.containers:
        # Filter out 0 labels
        labels = [int(v.get_width()) if v.get_width() > 0 else '' for v in c]
        ax.bar_label(c, labels=labels, label_type='center', fontsize=11, color='white', fontweight='bold')

    # Styling
    ax.set_title(col, fontsize=14, fontweight='bold', color='#333333')
    ax.set_xlabel('Count', fontsize=10)
    ax.set_ylabel('')

    # Clean up
    ax.legend().remove() # Remove individual legends
    sns.despine(left=True, bottom=True)
    ax.tick_params(axis='y', length=0, labelsize=12) # Remove y-ticks
    ax.grid(axis='x', linestyle=':', alpha=0.5)

# 5. Global Legend
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor=color_disease, label='Heart Disease'),
    Patch(facecolor=color_normal, label='Normal')
]
fig.legend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=2, fontsize=14, frameon=False)

plt.tight_layout()
# Add space for legend
plt.subplots_adjust(top=0.9)
plt.show()