In [1]:
import warnings
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import talib
import data
from feature_engineering import FeaturesEngineering

warnings.simplefilter(action='ignore', category=FutureWarning)

# Read Prices

In [2]:
# Define S3 bucket and file path
bucket_name = 'mscfe-capstone-project-bucket'
file_key = 'filtered_data.csv'

filtered = pd.read_csv('filtered_data.csv').drop(['Unnamed: 0'], axis=1)
display(filtered)

Unnamed: 0,seriesX,seriesY,correlation,spread_adf_stat,spread_adf_pValue,spread_stationary,series_coint_stat,series_coint_pValue,series_cointegrated
0,GOOG,GOOGL,0.999931,-4.971937,2.5e-05,True,-4.976339,0.000182,True
1,META,DAXEX_ETF,0.888934,-2.917056,0.0434,True,-3.483091,0.033924,True
2,AMZN,BNBUSDT,0.877059,-3.631203,0.005192,True,-3.5451,0.028601,True
3,AMZN,TRXUSDT,0.814577,-3.869803,0.002269,True,-3.387413,0.043768,True
4,SPY_ETF,AVAXUSDT,0.847292,-3.309927,0.014441,True,-3.698512,0.018406,True
5,SPY_ETF,ETHUSDT,0.862483,-3.428205,0.010034,True,-3.737367,0.016394,True
6,EQQQ_ETF,BNBUSDT,0.88606,-3.816404,0.002744,True,-3.670509,0.019987,True
7,BNBUSDT,ADAUSDT,0.901459,-3.279504,0.01582,True,-3.775562,0.014607,True
8,BNBUSDT,BTCUSDT,0.884604,-2.909961,0.044202,True,-3.39316,0.043116,True
9,BNBUSDT,TRXUSDT,0.933562,-3.65638,0.00477,True,-4.003675,0.007095,True


In [3]:
print('********************')
pairs = filtered['seriesX'].iloc[1:] + ' ' + filtered['seriesY'].iloc[1:]
print('Trading pairs:')
for p in pairs:
    print(p)
print('********************')

unique_tickers = pd.concat([filtered['seriesX'].iloc[1:], filtered['seriesY'].iloc[1:]]).drop_duplicates()
print('Unique tickers:')
for ut in unique_tickers:
    print(ut)
print('********************')

# Preview the source data
file_key = 'df_full.csv'
df_full = pd.read_csv('df_full.csv').drop(['Unnamed: 0'], axis=1)
# obj = s3.get_object(Bucket=bucket_name, Key=file_key)
# df_full = pd.read_csv(StringIO(obj['Body'].read().decode('utf-8'))).drop(['Unnamed: 0'], axis=1)
display(df_full.sample(5))

********************
Trading pairs:
META DAXEX_ETF
AMZN BNBUSDT
AMZN TRXUSDT
SPY_ETF AVAXUSDT
SPY_ETF ETHUSDT
EQQQ_ETF BNBUSDT
BNBUSDT ADAUSDT
BNBUSDT BTCUSDT
BNBUSDT TRXUSDT
BNBUSDT XRPUSDT
********************
Unique tickers:
META
AMZN
SPY_ETF
EQQQ_ETF
BNBUSDT
DAXEX_ETF
TRXUSDT
AVAXUSDT
ETHUSDT
ADAUSDT
BTCUSDT
XRPUSDT
********************


Unnamed: 0,OpenTime,Open,High,Low,Close,Symbol
345844,2025-01-04 06:50:00,1.0003,1.0003,1.0002,1.0002,USDCUSDT
246218,2024-11-30 16:40:00,655.48,655.66,650.97,652.16,BNBUSDT
300879,2024-12-19 16:10:00,97516.0,97696.0,97385.53,97488.16,BTCUSDT
117294,2024-10-16 22:15:00,1.0001,1.0002,1.0001,1.0001,USDCUSDT
372433,2025-01-13 16:20:00,0.2199,0.2202,0.2197,0.2198,TRXUSDT


**Sample Pairs**

```
Pairs: META & BNBUSDT
Correlation: 0.787

Spread ADF Statistic: -3.3381
p-value: 0.013 (Spread is stationary)

Cointegration Test Statistic: -3.6076
p-value: 0.024 (Both series are cointegrated)

In [4]:
feat = FeaturesEngineering()
feat.default_config

{'ema': [8, 21, 55],
 'macd': {'fast': 12, 'slow': 26, 'signal': 9},
 'rsi': [14],
 'bb': {'timeperiod': 20, 'nbdevup': 2, 'nbdevdn': 2},
 'atr': {'timeperiod': 14},
 'stoch': {'fastk_period': 14, 'slowk_period': 3, 'slowd_period': 3},
 'cci': {'timeperiod': 20},
 'willr': {'timeperiod': 14}}

In [5]:
tickerX = 'META'
tickerY = 'BNBUSDT'

df_feat = feat.create_features(
    tickerX, 
    tickerY,
    df_full[df_full['Symbol']==tickerX],
    df_full[df_full['Symbol']==tickerY],
    config=None,
    dropna=True
)

df_feat

Unnamed: 0_level_0,META_EMA8,META_EMA21,META_EMA55,META_MACD,META_RSI14,META_BBupper,META_BBmiddle,META_BBlower,META_ATR14,META_StochK14,...,BNBUSDT_RSI14,BNBUSDT_BBupper,BNBUSDT_BBmiddle,BNBUSDT_BBlower,BNBUSDT_ATR14,BNBUSDT_StochK14,BNBUSDT_StochD3,BNBUSDT_CCI20,BNBUSDT_WILLR14,Spread
OpenTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-10-14 17:00:00,594.214220,594.562755,595.189375,-0.321821,40.651971,595.740710,594.695505,593.650300,0.356894,1.522843e+00,...,58.189817,591.848469,589.8900,587.931531,0.739322,6.000000e+01,54.285714,131.214344,-17.142857,2.1250
2024-10-14 17:05:00,593.955505,594.425232,595.112968,-0.402728,36.169408,595.826211,594.587505,593.348799,0.372473,2.190840e-14,...,55.864824,591.978300,589.9500,587.921700,0.715084,6.571429e+01,60.000000,86.142322,-28.571429,1.9500
2024-10-14 17:10:00,593.703170,594.279302,595.031077,-0.479875,34.529282,595.855002,594.457505,593.060008,0.362297,2.190840e-14,...,54.688239,592.077194,590.0250,587.972806,0.678293,7.333333e+01,66.349206,63.578565,-34.285714,1.9200
2024-10-14 17:15:00,593.585799,594.178911,594.964788,-0.506530,39.118181,595.748311,594.338755,592.929199,0.361775,5.259259e+00,...,51.204099,592.095249,590.0850,588.074751,0.672700,6.190476e+01,66.984127,16.195857,-51.428571,2.8750
2024-10-14 17:20:00,593.388955,594.044464,594.883903,-0.559533,35.529759,595.715922,594.208755,592.701588,0.369863,5.259259e+00,...,50.059247,592.062328,590.1550,588.247672,0.638936,4.761905e+01,60.952381,-4.468820,-71.428571,2.6000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-17 18:35:00,613.952883,613.970253,613.956123,-0.056616,50.800371,614.888490,613.825295,612.762100,0.431433,8.657461e+01,...,35.174001,726.204551,723.9480,721.691449,0.687108,1.113184e-13,8.777270,-188.657153,-100.000000,-107.2925
2025-01-17 18:40:00,613.910020,613.951139,613.949118,-0.065935,48.078678,614.631796,613.756295,612.880794,0.423295,8.295188e+01,...,37.076008,725.938783,723.7195,721.500217,0.657314,2.857143e+00,1.459738,-148.820067,-91.428571,-107.8800
2025-01-17 18:45:00,613.941127,613.960126,613.952721,-0.049351,50.677954,614.573134,613.738290,612.903446,0.413774,8.225930e+01,...,33.366488,726.111375,723.5440,720.976625,0.678220,2.857143e+00,1.904762,-177.553814,-100.000000,-106.6400
2025-01-17 18:50:00,613.760877,613.884660,613.923338,-0.109185,43.276263,614.510959,613.679290,612.847621,0.449933,6.206794e+01,...,35.132225,726.081486,723.3485,720.615514,0.646919,4.945916e+00,3.553400,-140.002026,-93.733681,-107.8000


In [6]:
config = {
    'ema': [5, 15],
    'rsi': [14, 21],
    'atr': {
        'timeperiod': 14,
    }
}

tickerX = 'META'
tickerY = 'BNBUSDT'

df_feat2 = feat.create_features(
    tickerX, 
    tickerY,
    df_full[df_full['Symbol']==tickerX],
    df_full[df_full['Symbol']==tickerY],
    config=config,
    dropna=True
)

df_feat2

Unnamed: 0_level_0,META_EMA5,META_EMA15,META_RSI14,META_RSI21,META_ATR14,BNBUSDT_EMA5,BNBUSDT_EMA15,BNBUSDT_RSI14,BNBUSDT_RSI21,BNBUSDT_ATR14,Spread
OpenTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-10-14 14:15:00,594.816013,595.623040,51.385234,54.219439,0.684715,588.591516,588.284137,57.438715,53.846154,0.318867,5.2500
2024-10-14 14:20:00,594.590676,595.437660,50.200891,53.398265,0.650807,588.694344,588.361120,54.794981,52.160954,0.310377,5.2400
2024-10-14 14:25:00,594.183784,595.179203,46.013175,50.456181,0.659321,588.862896,588.465980,57.923432,54.408266,0.309635,4.1700
2024-10-14 14:30:00,593.955856,594.969302,46.819766,50.935404,0.621512,588.841931,588.507732,52.687722,51.050801,0.316090,4.7000
2024-10-14 14:35:00,593.750571,594.765639,45.910607,50.306565,0.588547,588.861287,588.556766,53.811751,51.831067,0.300655,4.4400
...,...,...,...,...,...,...,...,...,...,...,...
2025-01-17 18:35:00,614.014678,613.939154,50.800371,50.472493,0.431433,722.443332,723.525879,35.174001,40.726973,0.687108,-107.2925
2025-01-17 18:40:00,613.929785,613.916760,48.078678,48.955457,0.423295,722.175554,723.290144,37.076008,41.858616,0.657314,-107.8800
2025-01-17 18:45:00,613.969857,613.933415,50.677954,50.385642,0.413774,721.680370,722.965126,33.366488,39.100670,0.678220,-106.6400
2025-01-17 18:50:00,613.689905,613.832988,43.276263,46.084563,0.449933,721.430246,722.710735,35.132225,40.146751,0.646919,-107.8000


## Scaler

In [11]:
df_norm, standard_scaler = feat.normalize_features(df_feat2, scaler='StandardScaler')
df_minmax, minmax_scaler = feat.normalize_features(df_feat2, scaler='MinMax')

In [12]:
standard_scaler

In [13]:
minmax_scaler

In [14]:
df_norm

Unnamed: 0_level_0,META_EMA5,META_EMA15,META_RSI14,META_RSI21,META_ATR14,BNBUSDT_EMA5,BNBUSDT_EMA15,BNBUSDT_RSI14,BNBUSDT_RSI21,BNBUSDT_ATR14,Spread
OpenTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-10-14 14:15:00,0.172232,0.211480,0.059908,0.340651,-0.199159,-1.182640,-1.188825,0.394012,0.171988,-0.994646,1.704364
2024-10-14 14:20:00,0.161774,0.202837,-0.033576,0.263813,-0.297698,-1.180726,-1.187388,0.206861,0.035547,-1.005973,1.704107
2024-10-14 14:25:00,0.142890,0.190786,-0.364125,-0.011482,-0.272957,-1.177588,-1.185432,0.428325,0.217499,-1.006962,1.676609
2024-10-14 14:30:00,0.132312,0.181000,-0.300458,0.033360,-0.382830,-1.177979,-1.184653,0.057687,-0.054336,-0.998351,1.690229
2024-10-14 14:35:00,0.122785,0.171504,-0.372221,-0.025482,-0.478628,-1.177618,-1.183738,0.137257,0.008838,-1.018942,1.683547
...,...,...,...,...,...,...,...,...,...,...,...
2025-01-17 18:35:00,1.063235,1.065468,0.013743,-0.009956,-0.935206,1.308821,1.334795,-1.182118,-0.890197,-0.503394,-1.187938
2025-01-17 18:40:00,1.059295,1.064424,-0.201089,-0.151907,-0.958856,1.303837,1.330397,-1.047474,-0.798574,-0.543140,-1.203037
2025-01-17 18:45:00,1.061155,1.065200,0.004080,-0.018082,-0.986524,1.294620,1.324332,-1.310072,-1.021869,-0.515250,-1.171169
2025-01-17 18:50:00,1.048162,1.060518,-0.580158,-0.420540,-0.881445,1.289964,1.319585,-1.185075,-0.937174,-0.557008,-1.200981


In [15]:
df_minmax

Unnamed: 0_level_0,META_EMA5,META_EMA15,META_RSI14,META_RSI21,META_ATR14,BNBUSDT_EMA5,BNBUSDT_EMA15,BNBUSDT_RSI14,BNBUSDT_RSI21,BNBUSDT_ATR14,Spread
OpenTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-10-14 14:15:00,0.514158,0.520817,0.473940,0.527181,0.178508,0.192564,0.182085,0.568208,0.528202,0.010767,0.901849
2024-10-14 14:20:00,0.511482,0.518569,0.458940,0.514979,0.164701,0.193025,0.182446,0.538087,0.507498,0.009358,0.901795
2024-10-14 14:25:00,0.506649,0.515436,0.405899,0.471263,0.168168,0.193780,0.182937,0.573730,0.535108,0.009236,0.896061
2024-10-14 14:30:00,0.503942,0.512891,0.416115,0.478383,0.152772,0.193686,0.183132,0.514079,0.493859,0.010306,0.898901
2024-10-14 14:35:00,0.501504,0.510422,0.404600,0.469039,0.139349,0.193772,0.183361,0.526885,0.503445,0.007746,0.897508
...,...,...,...,...,...,...,...,...,...,...,...
2025-01-17 18:35:00,0.742178,0.742877,0.466532,0.471505,0.075374,0.792065,0.815049,0.314542,0.367021,0.071849,0.298727
2025-01-17 18:40:00,0.741170,0.742605,0.432060,0.448963,0.072060,0.790866,0.813945,0.336212,0.380924,0.066907,0.295579
2025-01-17 18:45:00,0.741646,0.742807,0.464982,0.470214,0.068183,0.788648,0.812424,0.293949,0.347041,0.070374,0.302224
2025-01-17 18:50:00,0.738321,0.741589,0.371234,0.406304,0.082907,0.787528,0.811234,0.314066,0.359893,0.065182,0.296008
