# Advanced Feature Engineering

Learn how to transform raw data into powerful features using the MKYZ `FeatureEngineer` class.

In [None]:
import mkyz
import pandas as pd
import numpy as np

# Create sample data with nonlinear relationships and datetimes
dates = pd.date_range('2023-01-01', periods=1000, freq='D')
df = pd.DataFrame({
    'date': dates,
    'val1': np.random.randn(1000),
    'val2': np.random.randn(1000),
    'cat': np.random.choice(['X', 'Y', 'Z'], 1000),
    'target': np.random.normal(0, 1, 1000)
})

## 1. Datetime Features

Automatically extract temporal patterns.

In [None]:
fe = mkyz.FeatureEngineer()
df = fe.create_datetime_features(df, column='date')

dt_cols = [c for c in df.columns if 'date_' in c]
print("Extracted Datetime Columns:", dt_cols)
df[dt_cols].head()

## 2. Polynomial and Interaction Features

Capture non-linear effects and feature crossovers.

In [None]:
df = fe.create_polynomial_features(df, columns=['val1', 'val2'], degree=2)
df = fe.create_interaction_features(df, columns=['val1', 'val2'])

print("New Columns:", [c for c in df.columns if '^' in c or '_x_' in c])

## 3. Lag and Rolling Features

Essential for time-series forecasting.

In [None]:
df = fe.create_lag_features(df, column='val1', lags=[1, 3, 7])
df = fe.create_rolling_features(df, column='val1', window=7)

print("Time-series Features Created!")
df[[c for c in df.columns if 'lag' in c or 'rolling' in c]].head(10)

## 4. Feature Selection

Reduce noise by selecting the most important features.

In [None]:
# Drop non-numeric for selection
X = df.select_dtypes(include=[np.number]).drop('target', axis=1)
y = df['target']

selected_features = mkyz.select_features(X, y, n_features=10)
print("Selected Top 10 Features:", selected_features)