# Exploratory Data Analysis

This notebook is used for exploratory data analysis (EDA) on the heart disease dataset. The goal is to visualize and understand the data, identify patterns, and prepare for further modeling.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
sns.set(style='whitegrid')

# Load the dataset
# Replace 'path_to_your_data' with the actual path to your dataset
data = pd.read_csv('path_to_your_data')

# Display the first few rows of the dataset
data.head()

In [None]:
# Summary statistics
data.describe()

In [None]:
# Check for missing values
missing_values = data.isnull().sum()
missing_values[missing_values > 0]

In [None]:
# Visualize the distribution of the target variable
plt.figure(figsize=(8, 6))
sns.countplot(x='target_variable', data=data)
plt.title('Distribution of Target Variable')
plt.xlabel('Target Variable')
plt.ylabel('Count')
plt.show()

In [None]:
# Correlation heatmap
plt.figure(figsize=(12, 8))
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()