# Feature Engineering

Transform raw stock prices into risk features for clustering.

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../src')

from features import (
    calculate_returns,
    calculate_volatility_features,
    calculate_risk_metrics,
    calculate_technical_indicators,
    calculate_liquidity_features,
    calculate_momentum_features,
    calculate_drawdown,
    aggregate_stock_features
)

## Load Data

In [2]:
df = pd.read_csv('../Data/Processed/cleaned_nse.csv')
print(f"Loaded {len(df):,} rows for {df['Stock_code'].nunique()} stocks")

df.head(3)

Loaded 69,754 rows for 75 stocks


Unnamed: 0,Date,Stock_code,Name,12m Low,12m High,Day Low,Day High,Day Price,Previous,Change,%Change,Volume,Adjusted Price,Sector,Month,Year
0,2021-01-04,ABSA,ABSA Bank Kenya Plc,8.5,14.2,9.42,9.8,9.52,9.66,-0.14,1.45,18500.0,0.0,Banking,1,2021
1,2021-01-05,ABSA,ABSA Bank Kenya Plc,8.5,14.2,9.44,9.7,9.44,9.52,-0.08,0.84,1923300.0,0.0,Banking,1,2021
2,2021-01-06,ABSA,ABSA Bank Kenya Plc,8.5,14.2,9.4,9.68,9.44,9.44,0.0,0.0,233400.0,0.0,Banking,1,2021


## Step 1: Returns

In [3]:
df = df.groupby('Stock_code', group_keys=False).apply(calculate_returns)

print(f" Added: daily_return")

 Added: daily_return


  df = df.groupby('Stock_code', group_keys=False).apply(calculate_returns)


## Step 2: Volatility

In [4]:
df = df.groupby('Stock_code', group_keys=False).apply(calculate_volatility_features)
print(f"Added: volatility_5d, volatility_10d, volatility_20d")

Added: volatility_5d, volatility_10d, volatility_20d


  df = df.groupby('Stock_code', group_keys=False).apply(calculate_volatility_features)


## Step 3: Risk Metrics

In [5]:
df = df.groupby('Stock_code', group_keys=False).apply(calculate_risk_metrics)
print(f"Added: downside_deviation_30d, var_95")

Added: downside_deviation_30d, var_95


  df = df.groupby('Stock_code', group_keys=False).apply(calculate_risk_metrics)


## Step 4: Technical Indicators

In [6]:
df = df.groupby('Stock_code', group_keys=False).apply(calculate_technical_indicators)
print(f"Added: rsi, bb_width, macd")

Added: rsi, bb_width, macd


  df = df.groupby('Stock_code', group_keys=False).apply(calculate_technical_indicators)
