# 🔍 Exploratory Data Analysis (EDA) Notebook
This notebook walks through a basic EDA process using a sample dataset.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Optional: display plots inline
%matplotlib inline


In [None]:
# Load a sample dataset
df = sns.load_dataset('titanic')
df.head()


## 📊 Basic Info and Summary Statistics

In [None]:
# Dataset shape and info
print("Shape:", df.shape)
df.info()


In [None]:
# Summary statistics
df.describe(include='all')


## 🔎 Missing Values

In [None]:
# Check for missing values
df.isnull().sum()


## 📈 Univariate Analysis

In [None]:
# Histogram for a numeric column
sns.histplot(df['age'].dropna(), kde=True)
plt.title("Age Distribution")
plt.show()


In [None]:
# Box plot to check outliers
sns.boxplot(x=df['fare'])
plt.title("Fare Boxplot")
plt.show()


## 🔗 Bivariate Analysis

In [None]:
# Scatterplot: not meaningful for this dataset, so we use heatmap
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()


## 🚨 Outlier Detection (Z-score Method)

In [None]:
from scipy import stats

z = np.abs(stats.zscore(df.select_dtypes(include=[np.number]).dropna()))
df_clean = df[(z < 3).all(axis=1)]
df_clean.shape


## 📊 Categorical Variable Analysis

In [None]:
# Bar plot for categorical data
sns.countplot(x='class', data=df)
plt.title("Passenger Class Distribution")
plt.show()
