<a href="https://colab.research.google.com/github/mittushaji25/crypto-xrp-analysis/blob/main/notebooks/03_feature_engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **🚀 XRPUSDT Crypto Market Analysis - Feature Engineering**


In [2]:
import pandas as pd
import numpy as np
xrp_data = pd.DataFrame
xrp_data = pd.read_csv("/content/crypto-xrp-analysis/data/cleaned/xrpusdt_data_cleaned.csv")

## Step 1: Lag Features

In [3]:
xrp_data['open_lag_1'] = xrp_data['open'].shift(1)
xrp_data['close_lag_1'] = xrp_data['close'].shift(1)
xrp_data['high_lag_1'] = xrp_data['high'].shift(1)
xrp_data['low_lag_1'] = xrp_data['low'].shift(1)
xrp_data['volumne_lag_1'] = xrp_data['volume'].shift(1)
xrp_data['volumne_lag_3'] = xrp_data['volume'].shift(3)

## Step 2: Candle Structure Features

In [4]:
from re import A
xrp_data['candle_size'] = xrp_data['high'] - xrp_data['low']
xrp_data['body_size'] = abs(xrp_data['close'] - xrp_data['open'])

In [5]:
xrp_data['upper_shadow'] = xrp_data['high'] - xrp_data[['open', 'close']].max(axis=1)
xrp_data['lower_shadow'] = xrp_data[['open', 'close']].min(axis=1) -xrp_data['low']

## Step 3: Trade Metrics & Market Behavior

In [6]:
xrp_data['buy_volume_ratio'] = xrp_data['taker_buy_base_volume'] / xrp_data['volume']
xrp_data['avg_trade_size'] = xrp_data['volume'] / xrp_data['number_of_trades']

## Step 4: Rolling Statistics

In [7]:
xrp_data['ma_7'] = xrp_data['close'].rolling(window=7).mean()
xrp_data['ma_21'] = xrp_data['close'].rolling(window=21).mean()
xrp_data['std_14'] = xrp_data['close'].rolling(window=14).std()
xrp_data['hl_spread'] = xrp_data['high'] - xrp_data['low']
xrp_data['hl_spread_change'] = xrp_data['hl_spread'] / xrp_data['hl_spread'].shift(1)

## Step 5: Rate of Change & Momentum

In [8]:
xrp_data['pct_change'] = xrp_data['close'].pct_change()
xrp_data['log_return'] = np.log(xrp_data['close'] / xrp_data['close'].shift(1))
xrp_data['momentum_3'] = xrp_data['close'] - xrp_data['close'].shift(3)
xrp_data['momentum_7'] = xrp_data['close'] - xrp_data['close'].shift(7)

## Step 6: Time-Based Features from Close Time

In [9]:
xrp_data['close_time'] = pd.to_datetime(xrp_data['close_time'])
# xrp_data['hour'] = xrp_data['close_time'].dt.hour
xrp_data['day_of_week'] = xrp_data['close_time'].dt.dayofweek
xrp_data['is_weekend'] = xrp_data['day_of_week'].isin([5, 6]).astype(int)
xrp_data['month'] = xrp_data['close_time'].dt.month

## Step 7: Anomaly Detection & Signals

### Calculate RSI

In [10]:
# Compute RSI manually (14-period)
delta = xrp_data['close'].diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)

avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()

rs = avg_gain / avg_loss
xrp_data['rsi'] = 100 - (100 / (1 + rs))


### Add MACD & Signal Line Manually

In [11]:
# MACD parameters
short_ema = xrp_data['close'].ewm(span=12, adjust=False).mean()  # Fast EMA
long_ema = xrp_data['close'].ewm(span=26, adjust=False).mean()   # Slow EMA

# MACD and signal line
xrp_data['macd'] = short_ema - long_ema
xrp_data['signal_line'] = xrp_data['macd'].ewm(span=9, adjust=False).mean()

In [12]:
xrp_data['volume_zscore'] = (xrp_data['volume'] - xrp_data['volume'].rolling(14).mean()) / xrp_data['volume'].rolling(14).std()
xrp_data['volume_spike'] = (xrp_data['volume_zscore'] > 2).astype(int)

# RSI/MACD binary signals (if those columns exist)
xrp_data['rsi_buy'] = (xrp_data['rsi'] < 30).astype(int)
xrp_data['rsi_sell'] = (xrp_data['rsi'] > 70).astype(int)
xrp_data['macd_bullish'] = (xrp_data['macd'] > xrp_data['signal_line']).astype(int)
xrp_data['macd_bearish'] = (xrp_data['macd'] < xrp_data['signal_line']).astype(int)

## Step 8: Create Target Variable for Modeling

In [13]:
xrp_data['future_return_1d'] = xrp_data['close'].shift(-1) / xrp_data['close'] - 1
xrp_data['price_direction'] = (xrp_data['future_return_1d'] > 0).astype(int)

In [14]:
xrp_data.dropna(inplace=True)
xrp_data.to_csv('/content/crypto-xrp-analysis/data/cleaned/xrpusdt_features.csv', index=False)