# EDA - Exploratory Data Analysis

Educational notebook with basic EDA workflows using pandas and matplotlib.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Example dataset
np.random.seed(42)
df = pd.DataFrame({
    'age': np.random.randint(15, 60, size=200),
    'hours_study': np.random.normal(5, 2, size=200).clip(0),
    'score': np.random.normal(70, 10, size=200).clip(0,100),
    'gender': np.random.choice(['Male','Female'], size=200, p=[0.45,0.55])
})
df.loc[0,'age'] = 150  # a deliberate outlier for demonstration

df.head()

In [None]:
# Quick checks
print('shape:', df.shape)
print('\ninfo:\n')
print(df.info())

df.describe()

In [None]:
# Detect missing values
print(df.isnull().sum())

In [None]:
# Histogram of scores
plt.figure(figsize=(6,4))
plt.hist(df['score'], bins=15)
plt.title('Score distribution')
plt.xlabel('Score')
plt.ylabel('Count')
plt.show()

In [None]:
# Boxplot to detect outliers
plt.figure(figsize=(6,4))
df.boxplot(column='age')
plt.title('Age boxplot')
plt.show()

In [None]:
# Correlation heatmap (simple)
import seaborn as sns
corr = df[['age','hours_study','score']].corr()
plt.figure(figsize=(5,4))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation heatmap')
plt.show()