# Integrated Data Models

The models in this notebook are trained on all collected data, including historical price data, sentiment and public interest data, correlated asset data, and feature engineered technical indicators.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, GRU, Conv1D, MaxPooling1D, Flatten

warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

In [2]:
data=pd.read_csv(r'C:\Users\madha\Desktop\Dissertation\Data\Master Data\Feature Engineering\FeatureEngineered_BTC.csv')

In [3]:
data.tail()

Unnamed: 0,date,name,open,high,low,close,volume,marketCap,daily_weighted_sentiment,comment_volume,...,boll,boll_ub,boll_lb,boll_width,kdjk,kdjd,kdj_signal,macd,macds,macdh
345,2025-07-10,2781,111329.195981,116608.784676,110660.749453,115987.206197,95911610000.0,2307032000000.0,0.525458,75.0,...,107669.665504,113817.149054,101522.181954,12294.9671,81.793074,74.888402,0.0,1644.884702,1001.960808,642.923893
346,2025-07-11,2781,115986.234797,118856.473739,115245.686349,117516.993668,86928360000.0,2337810000000.0,0.178156,84.0,...,108432.644753,115474.541671,101390.747835,14083.793836,83.999681,77.925495,0.0,2150.652051,1231.699057,918.952994
347,2025-07-12,2781,117530.712896,118219.900043,116977.023698,117435.230053,45524560000.0,2335906000000.0,-0.539831,91.0,...,109255.049279,116475.320345,102034.778214,14440.542132,85.234989,80.361993,0.0,2515.877212,1488.534688,1027.342524
348,2025-07-13,2781,117432.200846,119449.571906,117265.437865,119116.117549,49021090000.0,2369445000000.0,-0.436295,76.0,...,109931.966555,118167.735171,101696.19794,16471.537231,89.226523,83.316836,0.0,2907.439207,1772.315592,1135.123615
349,2025-07-14,2781,119115.78751,123091.612801,118959.196786,119849.70572,181746400000.0,2384183000000.0,-0.157348,138.0,...,110622.170237,119751.795633,101492.54484,18259.250793,85.887001,84.173558,0.0,3239.605192,2065.773512,1173.83168


### Preprocessing

We will only keep OHLCV data, market cap and the target feature from this set.

In [4]:
data.columns

Index(['date', 'name', 'open', 'high', 'low', 'close', 'volume', 'marketCap',
       'daily_weighted_sentiment', 'comment_volume', 'BTC_trends', 'gold_spot',
       'gspc_spot', 'ndx_spot', 'price_change', 'target', 'percent_change_24h',
       'percent_change_3d', 'percent_change_7d', 'percent_change_14d', 'ma_7d',
       'price_vs_ma7d', 'ma_14d', 'price_vs_ma14d', 'close_7_ema',
       'close_14_ema', 'dx', 'adx', 'adxr', 'rsi_7', 'rsi', 'stochrsi', 'atr',
       'mfi', 'boll', 'boll_ub', 'boll_lb', 'boll_width', 'kdjk', 'kdjd',
       'kdj_signal', 'macd', 'macds', 'macdh'],
      dtype='object')

In [5]:
df = data

In [6]:
df.head()

Unnamed: 0,date,name,open,high,low,close,volume,marketCap,daily_weighted_sentiment,comment_volume,...,boll,boll_ub,boll_lb,boll_width,kdjk,kdjd,kdj_signal,macd,macds,macdh
0,2024-07-30,2781,66819.052658,66987.672308,65323.191979,66201.016226,31380490000.0,1306384000000.0,-0.045274,36.0,...,66458.853902,69262.34789,63655.359913,5606.987977,58.349189,65.633342,0.0,157.101078,171.382629,-14.281551
1,2024-07-31,2781,66201.271077,66810.212692,64532.046298,64619.249649,31292790000.0,1275323000000.0,-0.074737,13.0,...,66343.878636,69204.23571,63483.521562,5720.714149,44.762545,58.676409,0.0,7.219335,137.599047,-130.379712
2,2024-08-01,2781,64625.840445,65593.244771,62248.939991,65357.501563,40975550000.0,1289712000000.0,0.00085,36.0,...,66285.856455,69096.411014,63475.301897,5621.109117,43.231546,53.528122,0.0,-61.48959,96.864049,-158.353639
3,2024-08-02,2781,65353.498474,65523.223571,61184.893198,61415.064573,43060880000.0,1212008000000.0,-0.075659,9.0,...,66015.256906,69579.901833,62450.611979,7129.289854,29.69263,45.582958,0.0,-361.92477,3.423001,-365.347771
4,2024-08-03,2781,61414.808494,62148.371762,59836.527372,60680.094699,31753030000.0,1197456000000.0,-0.3752,16.0,...,65734.458895,69976.2955,61492.62229,8483.67321,22.565146,37.910354,0.0,-635.557331,-126.241731,-509.3156


In [7]:
df['date'] = pd.to_datetime(df['date'])
df.sort_values(by='date', inplace=True)
df['day_of_week'] = df['date'].dt.dayofweek

In [8]:
df.head(10)

Unnamed: 0,date,name,open,high,low,close,volume,marketCap,daily_weighted_sentiment,comment_volume,...,boll_ub,boll_lb,boll_width,kdjk,kdjd,kdj_signal,macd,macds,macdh,day_of_week
0,2024-07-30,2781,66819.052658,66987.672308,65323.191979,66201.016226,31380490000.0,1306384000000.0,-0.045274,36.0,...,69262.34789,63655.359913,5606.987977,58.349189,65.633342,0.0,157.101078,171.382629,-14.281551,1
1,2024-07-31,2781,66201.271077,66810.212692,64532.046298,64619.249649,31292790000.0,1275323000000.0,-0.074737,13.0,...,69204.23571,63483.521562,5720.714149,44.762545,58.676409,0.0,7.219335,137.599047,-130.379712,2
2,2024-08-01,2781,64625.840445,65593.244771,62248.939991,65357.501563,40975550000.0,1289712000000.0,0.00085,36.0,...,69096.411014,63475.301897,5621.109117,43.231546,53.528122,0.0,-61.48959,96.864049,-158.353639,3
3,2024-08-02,2781,65353.498474,65523.223571,61184.893198,61415.064573,43060880000.0,1212008000000.0,-0.075659,9.0,...,69579.901833,62450.611979,7129.289854,29.69263,45.582958,0.0,-361.92477,3.423001,-365.347771,4
4,2024-08-03,2781,61414.808494,62148.371762,59836.527372,60680.094699,31753030000.0,1197456000000.0,-0.3752,16.0,...,69976.2955,61492.62229,8483.67321,22.565146,37.910354,0.0,-635.557331,-126.241731,-509.3156,5
5,2024-08-04,2781,60676.094177,61062.989554,57210.803329,58116.976961,31758920000.0,1146845000000.0,-0.204341,38.0,...,70706.282682,60000.886915,10705.395767,17.407554,31.076087,0.0,-1005.864608,-304.218236,-701.646372,6
6,2024-08-05,2781,58110.298456,58268.827409,49121.237378,53991.457797,108991100000.0,1065581000000.0,-0.068634,7.0,...,72181.914961,57414.685733,14767.229227,19.385076,27.179083,0.0,-1552.859757,-556.27132,-996.588437,0
7,2024-08-06,2781,53991.347588,57059.918617,53973.272276,56034.316591,49300480000.0,1106050000000.0,-0.068867,6.0,...,72754.851193,56033.301893,16721.5493,23.966833,26.108333,0.0,-1820.818966,-811.060864,-1009.758102,1
8,2024-08-07,2781,56040.632123,57726.881052,54620.509521,55027.460692,41637560000.0,1086126000000.0,0.007592,15.0,...,73300.519721,54592.972765,18707.546956,26.997105,26.40459,1.0,-2075.451657,-1065.440616,-1010.011042,2
9,2024-08-08,2781,55030.029026,62673.763977,54766.728423,61710.137564,45298470000.0,1218352000000.0,-0.028721,28.0,...,73006.707333,54386.783479,18619.923855,41.720756,31.509979,0.0,-1789.308883,-1210.901187,-578.407696,3


In [9]:
df.columns

Index(['date', 'name', 'open', 'high', 'low', 'close', 'volume', 'marketCap',
       'daily_weighted_sentiment', 'comment_volume', 'BTC_trends', 'gold_spot',
       'gspc_spot', 'ndx_spot', 'price_change', 'target', 'percent_change_24h',
       'percent_change_3d', 'percent_change_7d', 'percent_change_14d', 'ma_7d',
       'price_vs_ma7d', 'ma_14d', 'price_vs_ma14d', 'close_7_ema',
       'close_14_ema', 'dx', 'adx', 'adxr', 'rsi_7', 'rsi', 'stochrsi', 'atr',
       'mfi', 'boll', 'boll_ub', 'boll_lb', 'boll_width', 'kdjk', 'kdjd',
       'kdj_signal', 'macd', 'macds', 'macdh', 'day_of_week'],
      dtype='object')

In [10]:
features = df[['day_of_week','name', 'open', 'high', 'low', 'close', 'volume', 'marketCap',
       'daily_weighted_sentiment', 'comment_volume', 'BTC_trends', 'gold_spot',
       'gspc_spot', 'ndx_spot', 'percent_change_24h',
       'percent_change_3d', 'percent_change_7d', 'percent_change_14d', 'ma_7d',
       'price_vs_ma7d', 'ma_14d', 'price_vs_ma14d', 'close_7_ema',
       'close_14_ema', 'dx', 'adx', 'adxr', 'rsi_7', 'rsi', 'stochrsi', 'atr',
       'mfi', 'boll', 'boll_ub', 'boll_lb', 'boll_width', 'kdjk', 'kdjd',
       'kdj_signal', 'macd', 'macds', 'macdh']]
target = df['target']
target = target.replace(-1, 0)

In [11]:
split_point = int(len(df) * 0.85)
X_test_set = features[split_point:] 
y_test_set = target[split_point:]   
actuals = y_test_set.tolist()

In [12]:
window_sizes = [int(len(df) * 0.85), 150, 100, 60, 30, 20]

Creating a sequence function for sequence models.

In [13]:
def create_sequences(features, target, time_steps=10):
    Xs, ys = [], []
    for i in range(len(features) - time_steps):
        Xs.append(features[i:(i + time_steps)])
        ys.append(target[i + time_steps])
    return np.array(Xs), np.array(ys)

TIME_STEPS = 10 

## Models

The models are tested against windows that include the following sizes: 
- 85% of the dataset (297 days)
- 150 days 
- 100 days
- 60 days
- 30 days
- 20 days

### Logistic Regression

In [14]:
for window_size in window_sizes:
    predictions = []
    
    for i in range(len(X_test_set)):
        end_index = split_point + i
        start_index = end_index - window_size
            
        X_train = features.iloc[start_index:end_index]
        y_train = target.iloc[start_index:end_index]

        # This takes one day's data at one time
        X_test = X_test_set.iloc[[i]] 

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        model = LogisticRegression()
        model.fit(X_train_scaled, y_train)
        prediction = model.predict(X_test_scaled)[0]
        predictions.append(prediction)


    print(f"\nEvaluation for Logistic Regression with Window Size: {window_size}")
    current_actuals = actuals[len(actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for Logistic Regression with Window Size: 297
Accuracy: 0.4528
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.29      0.17      0.22        23
      UP (1)       0.51      0.67      0.58        30

    accuracy                           0.45        53
   macro avg       0.40      0.42      0.40        53
weighted avg       0.41      0.45      0.42        53

--------------------------------------------------

Evaluation for Logistic Regression with Window Size: 150
Accuracy: 0.4906
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.42      0.43      0.43        23
      UP (1)       0.55      0.53      0.54        30

    accuracy                           0.49        53
   macro avg       0.48      0.48      0.48        53
weighted avg       0.49      0.49      0.49        53

--------------------------------------------------

Evaluation for Logistic Regression with Window 

### Random Forest

In [15]:
for window_size in window_sizes:
    predictions = []
    
    for i in range(len(X_test_set)):
        end_index = split_point + i
        start_index = end_index - window_size
            
        X_train = features.iloc[start_index:end_index]
        y_train = target.iloc[start_index:end_index]
        X_test = X_test_set.iloc[[i]]

        model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
        model.fit(X_train, y_train) 
        prediction = model.predict(X_test)[0]
        predictions.append(prediction)

    print(f"\nEvaluation for Random Forest with Window Size: {window_size}")
    current_actuals = actuals[len(actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for Random Forest with Window Size: 297
Accuracy: 0.3962
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.34      0.43      0.38        23
      UP (1)       0.46      0.37      0.41        30

    accuracy                           0.40        53
   macro avg       0.40      0.40      0.40        53
weighted avg       0.41      0.40      0.40        53

--------------------------------------------------

Evaluation for Random Forest with Window Size: 150
Accuracy: 0.3962
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.36      0.52      0.43        23
      UP (1)       0.45      0.30      0.36        30

    accuracy                           0.40        53
   macro avg       0.41      0.41      0.39        53
weighted avg       0.41      0.40      0.39        53

--------------------------------------------------

Evaluation for Random Forest with Window Size: 100
Accuracy

### XG Boost

In [16]:
for window_size in window_sizes:
    predictions = []
    
    for i in range(len(X_test_set)):
        end_index = split_point + i
        start_index = end_index - window_size
            
        X_train = features.iloc[start_index:end_index]
        y_train = target.iloc[start_index:end_index]
        X_test = X_test_set.iloc[[i]]

        model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
        model.fit(X_train, y_train) 
        prediction = model.predict(X_test)[0]
        predictions.append(prediction)

    print(f"\nEvaluation for XGB with Window Size: {window_size}")
    current_actuals = actuals[len(actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for XGB with Window Size: 297
Accuracy: 0.4151
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.35      0.39      0.37        23
      UP (1)       0.48      0.43      0.46        30

    accuracy                           0.42        53
   macro avg       0.41      0.41      0.41        53
weighted avg       0.42      0.42      0.42        53

--------------------------------------------------

Evaluation for XGB with Window Size: 150
Accuracy: 0.4906
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.43      0.52      0.47        23
      UP (1)       0.56      0.47      0.51        30

    accuracy                           0.49        53
   macro avg       0.49      0.49      0.49        53
weighted avg       0.50      0.49      0.49        53

--------------------------------------------------

Evaluation for XGB with Window Size: 100
Accuracy: 0.4906
Classification Report

### MLP Classifier

In [17]:
for window_size in window_sizes:
    predictions = []
    
    for i in range(len(X_test_set)):
        end_index = split_point + i
        start_index = end_index - window_size
            
        X_train = features.iloc[start_index:end_index]
        y_train = target.iloc[start_index:end_index]
        X_test = X_test_set.iloc[[i]]

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        model = Sequential([
            Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
            Dense(16, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy')
        model.fit(X_train_scaled, y_train, epochs=10, batch_size=16, verbose=0)
    
        prediction = (model.predict(X_test_scaled, verbose=0) > 0.5).astype("int32")[0][0]
        predictions.append(prediction)
        
    print(f"\nEvaluation for MLP with Window Size: {window_size}")
    current_actuals = actuals[len(actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for MLP with Window Size: 297
Accuracy: 0.3962
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.34      0.43      0.38        23
      UP (1)       0.46      0.37      0.41        30

    accuracy                           0.40        53
   macro avg       0.40      0.40      0.40        53
weighted avg       0.41      0.40      0.40        53

--------------------------------------------------

Evaluation for MLP with Window Size: 150
Accuracy: 0.4151
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.37      0.48      0.42        23
      UP (1)       0.48      0.37      0.42        30

    accuracy                           0.42        53
   macro avg       0.42      0.42      0.42        53
weighted avg       0.43      0.42      0.42        53

--------------------------------------------------

Evaluation for MLP with Window Size: 100
Accuracy: 0.4717
Classification Report

### LSTM

In [18]:
X_test_seq = pd.concat([features.iloc[split_point - TIME_STEPS:split_point], X_test_set])
y_test_seq = pd.concat([target.iloc[split_point - TIME_STEPS:split_point], y_test_set])

In [19]:
for window_size in window_sizes:
    predictions = []
    num_predictions_possible = len(X_test_seq) - TIME_STEPS
    loop_actuals = y_test_seq[TIME_STEPS:].tolist()

    for i in range(num_predictions_possible):
        end_index = split_point + i + TIME_STEPS
        start_index = end_index - window_size - TIME_STEPS

        window_features = features.iloc[start_index:end_index]
        window_target = target.iloc[start_index:end_index]

        scaler = StandardScaler()
        window_features_scaled = scaler.fit_transform(window_features)
        
        X_seq, y_seq = create_sequences(window_features_scaled, window_target.values, TIME_STEPS)
        X_train, y_train = X_seq[:-1], y_seq[:-1]
        X_test = X_seq[-1:]
        
        model = Sequential([
            LSTM(50, activation='relu', input_shape=(TIME_STEPS, X_train.shape[2])),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy')
        model.fit(X_train, y_train, epochs=10, verbose=0)
        
        prediction = (model.predict(X_test, verbose=0) > 0.5).astype("int32")[0][0]
        predictions.append(prediction)

    print(f"\nEvaluation for LSTM with Window Size: {window_size}")
    current_actuals = loop_actuals[len(loop_actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for LSTM with Window Size: 297
Accuracy: 0.6038
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.55      0.52      0.53        23
      UP (1)       0.65      0.67      0.66        30

    accuracy                           0.60        53
   macro avg       0.60      0.59      0.59        53
weighted avg       0.60      0.60      0.60        53

--------------------------------------------------

Evaluation for LSTM with Window Size: 150
Accuracy: 0.5660
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.50      0.52      0.51        23
      UP (1)       0.62      0.60      0.61        30

    accuracy                           0.57        53
   macro avg       0.56      0.56      0.56        53
weighted avg       0.57      0.57      0.57        53

--------------------------------------------------

Evaluation for LSTM with Window Size: 100
Accuracy: 0.6226
Classification Rep

### GRU

In [20]:
for window_size in window_sizes:
    predictions = []
    num_predictions_possible = len(X_test_seq) - TIME_STEPS
    loop_actuals = y_test_seq[TIME_STEPS:].tolist()

    for i in range(num_predictions_possible):
        end_index = split_point + i + TIME_STEPS
        start_index = end_index - window_size - TIME_STEPS

        window_features = features.iloc[start_index:end_index]
        window_target = target.iloc[start_index:end_index]

        scaler = StandardScaler()
        window_features_scaled = scaler.fit_transform(window_features)
        
        X_seq, y_seq = create_sequences(window_features_scaled, window_target.values, TIME_STEPS)
        X_train, y_train = X_seq[:-1], y_seq[:-1]
        X_test = X_seq[-1:]

        model = Sequential([
            GRU(50, activation='relu', input_shape=(TIME_STEPS, X_train.shape[2])),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy')
        model.fit(X_train, y_train, epochs=10, verbose=0)
        
        prediction = (model.predict(X_test, verbose=0) > 0.5).astype("int32")[0][0]
        predictions.append(prediction)

    print(f"\nEvaluation for GRU with Window Size: {window_size}")
    current_actuals = loop_actuals[len(loop_actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for GRU with Window Size: 297
Accuracy: 0.5472
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.48      0.52      0.50        23
      UP (1)       0.61      0.57      0.59        30

    accuracy                           0.55        53
   macro avg       0.54      0.54      0.54        53
weighted avg       0.55      0.55      0.55        53

--------------------------------------------------

Evaluation for GRU with Window Size: 150
Accuracy: 0.6792
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.64      0.61      0.62        23
      UP (1)       0.71      0.73      0.72        30

    accuracy                           0.68        53
   macro avg       0.67      0.67      0.67        53
weighted avg       0.68      0.68      0.68        53

--------------------------------------------------

Evaluation for GRU with Window Size: 100
Accuracy: 0.5849
Classification Report

### 1D-CNN

In [21]:
for window_size in window_sizes:
    predictions = []
    num_predictions_possible = len(X_test_seq) - TIME_STEPS
    loop_actuals = y_test_seq[TIME_STEPS:].tolist()

    for i in range(num_predictions_possible):
        end_index = split_point + i + TIME_STEPS
        start_index = end_index - window_size - TIME_STEPS

        window_features = features.iloc[start_index:end_index]
        window_target = target.iloc[start_index:end_index]

        scaler = StandardScaler()
        window_features_scaled = scaler.fit_transform(window_features)
        
        X_seq, y_seq = create_sequences(window_features_scaled, window_target.values, TIME_STEPS)
        X_train, y_train = X_seq[:-1], y_seq[:-1]
        X_test = X_seq[-1:]
        

        model = Sequential([
            Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(TIME_STEPS, X_train.shape[2])),
            MaxPooling1D(pool_size=2),
            Flatten(),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy')
        model.fit(X_train, y_train, epochs=10, verbose=0)
        
        prediction = (model.predict(X_test, verbose=0) > 0.5).astype("int32")[0][0]
        predictions.append(prediction)

    print(f"\nEvaluation for 1D-CNN with Window Size: {window_size}")
    current_actuals = loop_actuals[len(loop_actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for 1D-CNN with Window Size: 297
Accuracy: 0.6038
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.54      0.65      0.59        23
      UP (1)       0.68      0.57      0.62        30

    accuracy                           0.60        53
   macro avg       0.61      0.61      0.60        53
weighted avg       0.62      0.60      0.61        53

--------------------------------------------------

Evaluation for 1D-CNN with Window Size: 150
Accuracy: 0.5849
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.53      0.43      0.48        23
      UP (1)       0.62      0.70      0.66        30

    accuracy                           0.58        53
   macro avg       0.57      0.57      0.57        53
weighted avg       0.58      0.58      0.58        53

--------------------------------------------------

Evaluation for 1D-CNN with Window Size: 100
Accuracy: 0.5472
Classificati

### Ensemble: 1D-CNN and LSTM

In [22]:
for window_size in window_sizes:
    predictions = []
    num_predictions_possible = len(X_test_seq) - TIME_STEPS
    loop_actuals = y_test_seq[TIME_STEPS:].tolist()

    for i in range(num_predictions_possible):
        end_index = split_point + i + TIME_STEPS
        start_index = end_index - window_size - TIME_STEPS

        window_features = features.iloc[start_index:end_index]
        window_target = target.iloc[start_index:end_index]

        scaler = StandardScaler()
        window_features_scaled = scaler.fit_transform(window_features)
        
        X_seq, y_seq = create_sequences(window_features_scaled, window_target.values, TIME_STEPS)
        X_train, y_train = X_seq[:-1], y_seq[:-1]
        X_test = X_seq[-1:]
        

        model = Sequential([
            Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(TIME_STEPS, X_train.shape[2])),
            MaxPooling1D(pool_size=2),
            LSTM(50, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy')
        model.fit(X_train, y_train, epochs=10, verbose=0)
        
        prediction = (model.predict(X_test, verbose=0) > 0.5).astype("int32")[0][0]
        predictions.append(prediction)

    print(f"\nEvaluation for 1D-CNN-LSTM with Window Size: {window_size}")
    current_actuals = loop_actuals[len(loop_actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for 1D-CNN-LSTM with Window Size: 297
Accuracy: 0.5283
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.45      0.43      0.44        23
      UP (1)       0.58      0.60      0.59        30

    accuracy                           0.53        53
   macro avg       0.52      0.52      0.52        53
weighted avg       0.53      0.53      0.53        53

--------------------------------------------------

Evaluation for 1D-CNN-LSTM with Window Size: 150
Accuracy: 0.6415
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.60      0.52      0.56        23
      UP (1)       0.67      0.73      0.70        30

    accuracy                           0.64        53
   macro avg       0.63      0.63      0.63        53
weighted avg       0.64      0.64      0.64        53

--------------------------------------------------

Evaluation for 1D-CNN-LSTM with Window Size: 100
Accuracy: 0.60