# 02 - Feature Engineering
## Fresh Flow Markets - Demand Forecasting

This notebook demonstrates the feature engineering pipeline:
- Daily demand aggregation
- Time features
- Lag and rolling features
- Weather data integration
- Danish holiday features
- Promotion features

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import plotly.express as px
from src.data.loader import load_key_tables, load_config
from src.data.cleaner import clean_all
from src.features.builder import build_features, aggregate_daily_demand

config = load_config()
tables = load_key_tables(config)
tables = clean_all(tables)
print('Data loaded and cleaned.')

## Step 1: Aggregate Daily Demand

In [None]:
daily = aggregate_daily_demand(tables['fct_orders'], tables['fct_order_items'])
print(f'Daily demand shape: {daily.shape}')
print(f'Unique stores: {daily["place_id"].nunique()}')
print(f'Unique items: {daily["item_id"].nunique()}')
print(f'Date range: {daily["date"].min()} to {daily["date"].max()}')
daily.head(10)

## Step 2: Build Complete Feature Matrix

In [None]:
features = build_features(tables, top_n_items=30)
print(f'Feature matrix shape: {features.shape}')
print(f'\nFeature columns ({len(features.columns)}):')
for col in sorted(features.columns):
    print(f'  {col}: {features[col].dtype}')

## Step 3: Feature Distributions

In [None]:
# Target variable distribution
fig = px.histogram(features, x='quantity_sold', nbins=50, title='Distribution of Daily Quantity Sold')
fig.update_layout(template='plotly_white')
fig.show()

# Correlation of lag features with target
lag_cols = [c for c in features.columns if 'lag' in c or 'rolling' in c or 'expanding' in c]
corrs = features[lag_cols + ['quantity_sold']].corr()['quantity_sold'].drop('quantity_sold').sort_values(ascending=False)
print('\nFeature correlations with quantity_sold:')
print(corrs)