# Advanced EDA & ML Workflow with edaflow
This notebook demonstrates a comprehensive workflow including advanced EDA, feature engineering, model training, and evaluation using edaflow.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import edaflow as eda
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

## 1. Load and Explore Data
Synthetic dataset with missing values, outliers, and categorical features.

In [None]:
df = pd.DataFrame({
    'feature1': [2, np.nan, 1, 5, 100, 2, 3, 4, np.nan, 1],
    'feature2': [7, 8, np.nan, 5, 7, 8, 6, 5, 7, 8],
    'category': ['A', 'B', 'A', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'target': [0, 1, 0, 1, 1, 0, 1, 0, 1, 0]
})
df.head()

## 2. Visualize Data
Use edaflow to display facet grid and highlight anomalies.

In [None]:
eda.display_facet_grid(df, features=['feature1', 'feature2'], target='target')
eda.highlight_anomalies(df)

## 3. Feature Engineering
Create lag features and scale features.

In [None]:
df = eda.create_lag_features(df, columns=['feature1', 'feature2'], lags=1)
df_scaled = eda.scale_features(df, columns=['feature1', 'feature2'])

## 4. Data Cleaning
Impute missing values and group rare categories.

In [None]:
df_scaled['feature1'] = df_scaled['feature1'].fillna(df_scaled['feature1'].mean())
df_scaled['feature2'] = df_scaled['feature2'].fillna(df_scaled['feature2'].mean())
df_scaled['category'] = eda.group_rare_categories(df_scaled['category'], threshold=0.2)

## 5. Train-Test Split
Split the cleaned and engineered data.

In [None]:
X = df_scaled.drop('target', axis=1)
y = df_scaled['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## 6. Train Advanced Model
Fit a GradientBoostingClassifier.

In [None]:
clf = GradientBoostingClassifier()
clf.fit(X_train, y_train)

## 7. Evaluate and Visualize
Show confusion matrix and feature importance.

In [None]:
y_pred = clf.predict(X_test)
eda.ml.plot_confusion_matrix(clf, X_test, y_test)
eda.ml.plot_feature_importance(clf, X.columns)

## 8. Export Results
Export final figure using edaflow.

In [None]:
eda.export_figure('final_results.png')