# Exploratory Data Analysis (EDA)

In this notebook, we will perform exploratory data analysis on the dataset to understand its structure, visualize key features, and identify any patterns or anomalies.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
sns.set(style='whitegrid')

In [2]:
# Load the dataset
data = pd.read_csv('../data/processed/processed_data.csv')

# Display the first few rows of the dataset
data.head()

In [3]:
# Summary statistics
data.describe()

In [4]:
# Check for missing values
missing_values = data.isnull().sum()
missing_values[missing_values > 0]

In [5]:
# Visualize the distribution of the target variable
plt.figure(figsize=(10, 6))
sns.countplot(x='target', data=data)
plt.title('Distribution of Target Variable')
plt.xlabel('Target')
plt.ylabel('Count')
plt.show()

In [6]:
# Correlation heatmap
plt.figure(figsize=(12, 8))
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()