# EDA and Training Notebook
This notebook demonstrates data loading, EDA (missing values, distributions, correlations), training pipeline, and saving the best model.

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

DATA_DIR = Path('../data/raw')
df = pd.read_csv(DATA_DIR / 'student-mat.csv', sep=';')
df.head()

In [None]:
# Missing values
print('Missing per column:')
print(df.isna().sum())

In [None]:
# Target distribution
sns.histplot(df['G3'], kde=True)
plt.title('Distribution of final grade (G3)')
plt.show()

In [None]:
# correlation heatmap among numeric features
numeric = df.select_dtypes(include=['int64','float64'])
plt.figure(figsize=(10,8))
sns.heatmap(numeric.corr(), annot=True, fmt='.2f', cmap='coolwarm')
plt.show()

## Training
Run the `ml/train.py` script to train models and produce a saved artifact and metadata in `/models`.