# Feature Engineering

In [None]:
import pandas as pd
import numpy as np
import os
from stockstats import StockDataFrame as Sdf
import stockstats

## XRP


### Creating Target Variable

In [5]:
df = pd.read_csv(r'C:\Users\madha\Desktop\Dissertation\Data\Master Data\Merged\XRP_merged.csv')

df['date'] = pd.to_datetime(df['date'])
df.sort_values(by='date', inplace=True)

In [6]:
df.tail()

Unnamed: 0.1,Unnamed: 0,date,name,open,high,low,close,volume,marketCap,daily_weighted_sentiment,comment_volume,XRP_trends,gold_spot,gspc_spot,ndx_spot
360,4,2025-07-11,2781,2.546631,2.96427,2.517547,2.734864,15789100000.0,161716100000.0,-0.25615,49.0,33.0,3356.0,6259.75,22780.59961
361,3,2025-07-12,2781,2.73485,2.841254,2.66991,2.737636,8647115000.0,161880800000.0,0.024163,37.0,33.0,3356.0,6259.75,22780.59961
362,2,2025-07-13,2781,2.737636,2.8785,2.726897,2.835941,7159804000.0,167692800000.0,-0.075498,93.0,63.0,3356.0,6259.75,22780.59961
363,1,2025-07-14,2781,2.835923,3.021792,2.827182,2.954977,12393160000.0,174727100000.0,0.531816,3.0,63.0,3351.5,6268.560059,22855.63086
364,0,2025-07-15,2781,2.954884,2.957473,2.808502,2.918952,8879750000.0,172602100000.0,-0.034089,46.0,63.0,3329.800049,6243.759766,22884.58984


In [7]:
def assign_target(change):
    if pd.isna(change):
        return np.nan  
    elif change > 0:
        return 1       
    else:
        return -1     

In [8]:
df['price_change'] = df['close'].diff()
df['next_day_price_change'] = df['price_change'].shift(-1) 

In [9]:
df['target'] = df['next_day_price_change'].apply(assign_target)
df.drop(columns=['next_day_price_change'], inplace=True)
df.dropna(subset=['target'], inplace=True)

In [10]:
df.head()

Unnamed: 0.1,Unnamed: 0,date,name,open,high,low,close,volume,marketCap,daily_weighted_sentiment,comment_volume,XRP_trends,gold_spot,gspc_spot,ndx_spot,price_change,target
0,364,2024-07-16,2781,0.537753,0.592527,0.534284,0.579376,3380662000.0,32332260000.0,0.0,0.0,25.0,2462.399902,5667.200195,20398.61914,,1.0
1,363,2024-07-17,2781,0.579373,0.636644,0.579373,0.626486,3708255000.0,35026490000.0,-0.0465,8.0,25.0,2454.800049,5588.27002,19799.14063,0.04711,-1.0
2,362,2024-07-18,2781,0.626589,0.635987,0.560432,0.569574,2986712000.0,31844590000.0,-0.0465,8.0,25.0,2451.800049,5544.589844,19705.08984,-0.056912,1.0
3,361,2024-07-19,2781,0.569608,0.586012,0.542638,0.573752,2340175000.0,32076910000.0,-0.007005,32.0,25.0,2395.5,5505.0,19522.61914,0.004178,1.0
4,360,2024-07-20,2781,0.573729,0.603782,0.571832,0.594651,1538359000.0,33246720000.0,-0.007005,32.0,25.0,2395.5,5505.0,19522.61914,0.020899,1.0


### Percentage Changes (24h, 3days, 7days)

In [11]:
df['percent_change_24h'] = df['close'].pct_change() * 100
df['percent_change_3d'] = df['close'].pct_change(periods=3) * 100
df['percent_change_7d'] = df['close'].pct_change(periods=7) * 100
df['percent_change_14d'] = df['close'].pct_change(periods=14) * 100

### Moving Average

ma_7d: This feature tells the model about the level of the recent price trend. It acts as a smoothed, less noisy version of the price itself. Currently taken for 7 Days.

price_vs_ma7d: This feature tells the model about the deviation from the recent trend. It measures how "overextended" or "oversold" the current price is compared to its recent average. A model might learn that when this value is extremely high (e.g., +15%), the price is likely to come back down, even if the trend is generally up.

In [12]:
df['ma_7d'] = df['close'].rolling(window=7).mean()
df['price_vs_ma7d'] = (df['close'] - df['ma_7d']) / df['ma_7d'] * 100

In [13]:
df['ma_14d'] = df['close'].rolling(window=14).mean()
df['price_vs_ma14d'] = (df['close'] - df['ma_14d']) / df['ma_14d'] * 100

In [14]:
df.tail()

Unnamed: 0.1,Unnamed: 0,date,name,open,high,low,close,volume,marketCap,daily_weighted_sentiment,...,price_change,target,percent_change_24h,percent_change_3d,percent_change_7d,percent_change_14d,ma_7d,price_vs_ma7d,ma_14d,price_vs_ma14d
359,5,2025-07-10,2781,2.405636,2.558056,2.397295,2.546683,6049788000.0,150424900000.0,-0.187919,...,0.141047,1.0,5.863172,11.987475,12.801976,20.933426,2.321405,9.704393,2.263424,12.514601
360,4,2025-07-11,2781,2.546631,2.96427,2.517547,2.734864,15789100000.0,161716100000.0,-0.25615,...,0.188181,1.0,7.389261,18.329176,23.128911,27.641165,2.394794,14.200385,2.305727,18.611754
361,3,2025-07-12,2781,2.73485,2.841254,2.66991,2.737636,8647115000.0,161880800000.0,0.024163,...,0.002772,1.0,0.101353,13.800902,23.399103,25.232408,2.468953,10.882453,2.345127,16.73721
362,2,2025-07-13,2781,2.737636,2.8785,2.726897,2.835941,7159804000.0,167692800000.0,-0.075498,...,0.098305,1.0,3.590885,11.358236,24.791642,28.493306,2.549439,11.237855,2.390046,18.656335
363,1,2025-07-14,2781,2.835923,3.021792,2.827182,2.954977,12393160000.0,174727100000.0,0.531816,...,0.119036,-1.0,4.197407,8.048415,29.941748,31.992268,2.64671,11.647176,2.441205,21.045831


In [15]:
df.head(15)

Unnamed: 0.1,Unnamed: 0,date,name,open,high,low,close,volume,marketCap,daily_weighted_sentiment,...,price_change,target,percent_change_24h,percent_change_3d,percent_change_7d,percent_change_14d,ma_7d,price_vs_ma7d,ma_14d,price_vs_ma14d
0,364,2024-07-16,2781,0.537753,0.592527,0.534284,0.579376,3380662000.0,32332260000.0,0.0,...,,1.0,,,,,,,,
1,363,2024-07-17,2781,0.579373,0.636644,0.579373,0.626486,3708255000.0,35026490000.0,-0.0465,...,0.04711,-1.0,8.131156,,,,,,,
2,362,2024-07-18,2781,0.626589,0.635987,0.560432,0.569574,2986712000.0,31844590000.0,-0.0465,...,-0.056912,1.0,-9.084272,,,,,,,
3,361,2024-07-19,2781,0.569608,0.586012,0.542638,0.573752,2340175000.0,32076910000.0,-0.007005,...,0.004178,1.0,0.733586,-0.970597,,,,,,
4,360,2024-07-20,2781,0.573729,0.603782,0.571832,0.594651,1538359000.0,33246720000.0,-0.007005,...,0.020899,1.0,3.64253,-5.081401,,,,,,
5,359,2024-07-21,2781,0.594653,0.607188,0.575616,0.597467,1489477000.0,33404770000.0,-0.007005,...,0.002816,1.0,0.473477,4.89716,,,,,,
6,358,2024-07-22,2781,0.59748,0.622663,0.586667,0.607637,2052124000.0,33972670000.0,-0.148717,...,0.01017,-1.0,1.702202,5.905812,,,0.592706,2.519105,,
7,357,2024-07-23,2781,0.607637,0.616576,0.584276,0.597457,1657693000.0,33403650000.0,-0.142015,...,-0.01018,1.0,-1.675346,0.471808,3.120826,,0.595289,0.364163,,
8,356,2024-07-24,2781,0.59746,0.632155,0.592577,0.618843,1933085000.0,34599690000.0,-0.142015,...,0.021386,-1.0,3.579574,3.577853,-1.219856,,0.594197,4.147777,,
9,355,2024-07-25,2781,0.618852,0.623824,0.586939,0.599816,2306854000.0,33566120000.0,-0.142015,...,-0.019027,1.0,-3.074654,-1.287101,5.309608,,0.598518,0.216933,,


### Stockstats Features

In [17]:
df.rename(columns={
    'Close': 'close',
    'High': 'high',
    'Low': 'low',
    'Open': 'open',
    'Volume': 'volume'
}, inplace=True)

df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

df.drop('Unnamed: 0', axis=1, inplace=True)

In [18]:
stock_df = Sdf.retype(df)

In [19]:
stock_df['close_7_ema']
stock_df['close_14_ema']
stock_df['adx']
stock_df['rsi_7']
stock_df['rsi']
stock_df['stochrsi']
stock_df['atr']
stock_df['mfi']

date
2024-07-16    0.500000
2024-07-17    0.500000
2024-07-18    0.500000
2024-07-19    0.500000
2024-07-20    0.500000
                ...   
2025-07-10    0.800784
2025-07-11    0.898932
2025-07-12    0.911115
2025-07-13    0.919541
2025-07-14    0.930483
Name: mfi, Length: 364, dtype: float64

In [20]:
stock_df['boll']
stock_df['boll_width'] = stock_df['boll_ub'] - stock_df['boll_lb']

In [21]:
stock_df['kdjk']
stock_df['kdjd']

#Checking if k line is over d line
stock_df['k_above_d'] = stock_df['kdjk'] > stock_df['kdjd']
stock_df['crossover'] = stock_df['k_above_d'].diff()

stock_df['kdj_signal'] = 0.0
# For Bullish signal
stock_df.loc[stock_df['crossover'] & stock_df['k_above_d'], 'kdj_signal'] = 1.0
# For Bearish signal
stock_df.loc[stock_df['crossover'] & ~stock_df['k_above_d'], 'kdj_signal'] = -1.0

stock_df = stock_df.drop(['k_above_d', 'crossover'], axis=1)

print(stock_df[['kdjk', 'kdjd', 'kdj_signal']].tail(10))


                 kdjk       kdjd  kdj_signal
date                                        
2025-07-05  64.725562  68.088455         0.0
2025-07-06  67.788060  67.988323         0.0
2025-07-07  65.935610  67.304085         0.0
2025-07-08  70.984244  68.530805         1.0
2025-07-09  78.496332  71.852647         0.0
2025-07-10  84.705485  76.136926         0.0
2025-07-11  79.791463  77.355105         0.0
2025-07-12  76.636424  77.115545        -1.0
2025-07-13  78.800563  77.677217         1.0
2025-07-14  83.118311  79.490915         0.0


In [22]:
window=stockstats.set_dft_window('macd', (8, 20, 9))
stockstats.set_dft_window('macd', window)

(8, 20, 9)

In [23]:
stock_df = Sdf.retype(stock_df)

In [24]:
stock_df['macd']

date
2024-07-16    0.000000
2024-07-17    0.001057
2024-07-18   -0.000418
2024-07-19   -0.000928
2024-07-20   -0.000354
                ...   
2025-07-10    0.052895
2025-07-11    0.083217
2025-07-12    0.106246
2025-07-13    0.130920
2025-07-14    0.158256
Name: macd, Length: 364, dtype: float64

In [25]:
stock_df.head()

Unnamed: 0_level_0,name,open,high,low,close,volume,marketCap,daily_weighted_sentiment,comment_volume,XRP_trends,...,boll,boll_ub,boll_lb,boll_width,kdjk,kdjd,kdj_signal,macd,macds,macdh
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-07-16,2781,0.537753,0.592527,0.534284,0.579376,3380662000.0,32332260000.0,0.0,0.0,25.0,...,0.579376,,,,59.140213,53.046738,0.0,0.0,0.0,0.0
2024-07-17,2781,0.579373,0.636644,0.579373,0.626486,3708255000.0,35026490000.0,-0.0465,8.0,25.0,...,0.602931,0.669554,0.536307,0.133247,69.45225,58.515242,0.0,0.001057,0.000587,0.00047
2024-07-18,2781,0.626589,0.635987,0.560432,0.569574,2986712000.0,31844590000.0,-0.0465,8.0,25.0,...,0.591812,0.652663,0.53096,0.121703,57.793719,58.274734,-1.0,-0.000418,0.000175,-0.000593
2024-07-19,2781,0.569608,0.586012,0.542638,0.573752,2340175000.0,32076910000.0,-0.007005,32.0,25.0,...,0.587297,0.640162,0.534432,0.105731,51.382028,55.977166,0.0,-0.000928,-0.000198,-0.000729
2024-07-20,2781,0.573729,0.603782,0.571832,0.594651,1538359000.0,33246720000.0,-0.007005,32.0,25.0,...,0.588768,0.635021,0.542515,0.092506,53.913337,55.289223,0.0,-0.000354,-0.000245,-0.000109


In [26]:
all_features_df = pd.DataFrame(stock_df)

In [27]:
all_features_df.dropna(inplace=True)

In [28]:
all_features_df.to_csv(r'C:\Users\madha\Desktop\Dissertation\Data\Master Data\Feature Engineering\FeatureEngineered_XRP.csv')