# Historical Data, Sentiment Analysis and Correlated Features Models

The models in this notebook are trained on historical data obtained from CMC, sentiment data from reddit, google trends data and data from correlated features.

In [17]:
import pandas as pd
import numpy as np
import tensorflow as tf
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, GRU, Conv1D, MaxPooling1D, Flatten

warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

In [18]:
data=pd.read_csv(r'C:\Users\madha\Desktop\Dissertation\Data\Master Data\Feature Engineering\FeatureEngineered_BTC.csv')

In [19]:
data.tail()

Unnamed: 0,date,name,open,high,low,close,volume,marketCap,daily_weighted_sentiment,comment_volume,...,boll,boll_ub,boll_lb,boll_width,kdjk,kdjd,kdj_signal,macd,macds,macdh
345,2025-07-10,2781,111329.195981,116608.784676,110660.749453,115987.206197,95911610000.0,2307032000000.0,0.525458,75.0,...,107669.665504,113817.149054,101522.181954,12294.9671,81.793074,74.888402,0.0,1644.884702,1001.960808,642.923893
346,2025-07-11,2781,115986.234797,118856.473739,115245.686349,117516.993668,86928360000.0,2337810000000.0,0.178156,84.0,...,108432.644753,115474.541671,101390.747835,14083.793836,83.999681,77.925495,0.0,2150.652051,1231.699057,918.952994
347,2025-07-12,2781,117530.712896,118219.900043,116977.023698,117435.230053,45524560000.0,2335906000000.0,-0.539831,91.0,...,109255.049279,116475.320345,102034.778214,14440.542132,85.234989,80.361993,0.0,2515.877212,1488.534688,1027.342524
348,2025-07-13,2781,117432.200846,119449.571906,117265.437865,119116.117549,49021090000.0,2369445000000.0,-0.436295,76.0,...,109931.966555,118167.735171,101696.19794,16471.537231,89.226523,83.316836,0.0,2907.439207,1772.315592,1135.123615
349,2025-07-14,2781,119115.78751,123091.612801,118959.196786,119849.70572,181746400000.0,2384183000000.0,-0.157348,138.0,...,110622.170237,119751.795633,101492.54484,18259.250793,85.887001,84.173558,0.0,3239.605192,2065.773512,1173.83168


### Preprocessing

We will only keep OHLCV data, market cap and the target feature from this set.

In [20]:
data.columns

Index(['date', 'name', 'open', 'high', 'low', 'close', 'volume', 'marketCap',
       'daily_weighted_sentiment', 'comment_volume', 'BTC_trends', 'gold_spot',
       'gspc_spot', 'ndx_spot', 'price_change', 'target', 'percent_change_24h',
       'percent_change_3d', 'percent_change_7d', 'percent_change_14d', 'ma_7d',
       'price_vs_ma7d', 'ma_14d', 'price_vs_ma14d', 'close_7_ema',
       'close_14_ema', 'dx', 'adx', 'adxr', 'rsi_7', 'rsi', 'stochrsi', 'atr',
       'mfi', 'boll', 'boll_ub', 'boll_lb', 'boll_width', 'kdjk', 'kdjd',
       'kdj_signal', 'macd', 'macds', 'macdh'],
      dtype='object')

In [21]:
cols=['date','open','high','low','close','volume','marketCap','target','daily_weighted_sentiment', 'comment_volume', 'BTC_trends','gold_spot',
       'gspc_spot', 'ndx_spot']
df = data[cols]

In [22]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,marketCap,target,daily_weighted_sentiment,comment_volume,BTC_trends,gold_spot,gspc_spot,ndx_spot
0,2024-07-30,66819.052658,66987.672308,65323.191979,66201.016226,31380490000.0,1306384000000.0,-1.0,-0.045274,36.0,34.0,2405.0,5436.439941,18796.26953
1,2024-07-31,66201.271077,66810.212692,64532.046298,64619.249649,31292790000.0,1275323000000.0,1.0,-0.074737,13.0,34.0,2426.5,5522.299805,19362.42969
2,2024-08-01,64625.840445,65593.244771,62248.939991,65357.501563,40975550000.0,1289712000000.0,-1.0,0.00085,36.0,34.0,2435.0,5446.680176,18890.39063
3,2024-08-02,65353.498474,65523.223571,61184.893198,61415.064573,43060880000.0,1212008000000.0,-1.0,-0.075659,9.0,34.0,2425.699951,5346.560059,18440.84961
4,2024-08-03,61414.808494,62148.371762,59836.527372,60680.094699,31753030000.0,1197456000000.0,-1.0,-0.3752,16.0,34.0,2425.699951,5346.560059,18440.84961


In [23]:
df['date'] = pd.to_datetime(df['date'])
df.sort_values(by='date', inplace=True)
df['day_of_week'] = df['date'].dt.dayofweek

In [24]:
df.head(10)

Unnamed: 0,date,open,high,low,close,volume,marketCap,target,daily_weighted_sentiment,comment_volume,BTC_trends,gold_spot,gspc_spot,ndx_spot,day_of_week
0,2024-07-30,66819.052658,66987.672308,65323.191979,66201.016226,31380490000.0,1306384000000.0,-1.0,-0.045274,36.0,34.0,2405.0,5436.439941,18796.26953,1
1,2024-07-31,66201.271077,66810.212692,64532.046298,64619.249649,31292790000.0,1275323000000.0,1.0,-0.074737,13.0,34.0,2426.5,5522.299805,19362.42969,2
2,2024-08-01,64625.840445,65593.244771,62248.939991,65357.501563,40975550000.0,1289712000000.0,-1.0,0.00085,36.0,34.0,2435.0,5446.680176,18890.39063,3
3,2024-08-02,65353.498474,65523.223571,61184.893198,61415.064573,43060880000.0,1212008000000.0,-1.0,-0.075659,9.0,34.0,2425.699951,5346.560059,18440.84961,4
4,2024-08-03,61414.808494,62148.371762,59836.527372,60680.094699,31753030000.0,1197456000000.0,-1.0,-0.3752,16.0,34.0,2425.699951,5346.560059,18440.84961,5
5,2024-08-04,60676.094177,61062.989554,57210.803329,58116.976961,31758920000.0,1146845000000.0,-1.0,-0.204341,38.0,52.0,2425.699951,5346.560059,18440.84961,6
6,2024-08-05,58110.298456,58268.827409,49121.237378,53991.457797,108991100000.0,1065581000000.0,1.0,-0.068634,7.0,52.0,2401.699951,5186.330078,17895.16016,0
7,2024-08-06,53991.347588,57059.918617,53973.272276,56034.316591,49300480000.0,1106050000000.0,-1.0,-0.068867,6.0,52.0,2389.100098,5240.029785,18077.91992,1
8,2024-08-07,56040.632123,57726.881052,54620.509521,55027.460692,41637560000.0,1086126000000.0,1.0,0.007592,15.0,52.0,2390.5,5199.5,17867.36914,2
9,2024-08-08,55030.029026,62673.763977,54766.728423,61710.137564,45298470000.0,1218352000000.0,-1.0,-0.028721,28.0,52.0,2422.199951,5319.310059,18413.82031,3


In [25]:
features = df[['day_of_week','open','high','low','close','volume','marketCap','daily_weighted_sentiment', 'comment_volume', 'BTC_trends','gold_spot',
       'gspc_spot', 'ndx_spot']]
target = df['target']
target = target.replace(-1, 0)

In [26]:
features

Unnamed: 0,day_of_week,open,high,low,close,volume,marketCap,daily_weighted_sentiment,comment_volume,BTC_trends,gold_spot,gspc_spot,ndx_spot
0,1,66819.052658,66987.672308,65323.191979,66201.016226,3.138049e+10,1.306384e+12,-0.045274,36.0,34.0,2405.000000,5436.439941,18796.26953
1,2,66201.271077,66810.212692,64532.046298,64619.249649,3.129279e+10,1.275323e+12,-0.074737,13.0,34.0,2426.500000,5522.299805,19362.42969
2,3,64625.840445,65593.244771,62248.939991,65357.501563,4.097555e+10,1.289712e+12,0.000850,36.0,34.0,2435.000000,5446.680176,18890.39063
3,4,65353.498474,65523.223571,61184.893198,61415.064573,4.306088e+10,1.212008e+12,-0.075659,9.0,34.0,2425.699951,5346.560059,18440.84961
4,5,61414.808494,62148.371762,59836.527372,60680.094699,3.175303e+10,1.197456e+12,-0.375200,16.0,34.0,2425.699951,5346.560059,18440.84961
...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,3,111329.195981,116608.784676,110660.749453,115987.206197,9.591161e+10,2.307032e+12,0.525458,75.0,45.0,3317.399902,6280.459961,22829.25977
346,4,115986.234797,118856.473739,115245.686349,117516.993668,8.692836e+10,2.337810e+12,0.178156,84.0,45.0,3356.000000,6259.750000,22780.59961
347,5,117530.712896,118219.900043,116977.023698,117435.230053,4.552456e+10,2.335906e+12,-0.539831,91.0,45.0,3356.000000,6259.750000,22780.59961
348,6,117432.200846,119449.571906,117265.437865,119116.117549,4.902109e+10,2.369445e+12,-0.436295,76.0,53.0,3356.000000,6259.750000,22780.59961


In [27]:
split_point = int(len(df) * 0.85)
X_test_set = features[split_point:] 
y_test_set = target[split_point:]   
actuals = y_test_set.tolist()

In [28]:
window_sizes = [int(len(df) * 0.85), 150, 100, 60, 30, 20]

Creating a sequence function for sequence models.

In [29]:
def create_sequences(features, target, time_steps=10):
    Xs, ys = [], []
    for i in range(len(features) - time_steps):
        Xs.append(features[i:(i + time_steps)])
        ys.append(target[i + time_steps])
    return np.array(Xs), np.array(ys)

TIME_STEPS = 10 

## Models

The models are tested against windows that include the following sizes: 
- 85% of the dataset (297 days)
- 150 days 
- 100 days
- 60 days
- 30 days
- 20 days

### Logistic Regression

In [32]:
for window_size in window_sizes:
    predictions = []
    
    for i in range(len(X_test_set)):
        end_index = split_point + i
        start_index = end_index - window_size
            
        X_train = features.iloc[start_index:end_index]
        y_train = target.iloc[start_index:end_index]

        # This takes one day's data at one time
        X_test = X_test_set.iloc[[i]] 

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        model = LogisticRegression()
        model.fit(X_train_scaled, y_train)
        prediction = model.predict(X_test_scaled)[0]
        predictions.append(prediction)


    print(f"\nEvaluation for Logistic Regression with Window Size: {window_size}")
    current_actuals = actuals[len(actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for Logistic Regression with Window Size: 297
Accuracy: 0.4906
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.44      0.61      0.51        23
      UP (1)       0.57      0.40      0.47        30

    accuracy                           0.49        53
   macro avg       0.50      0.50      0.49        53
weighted avg       0.51      0.49      0.49        53

--------------------------------------------------

Evaluation for Logistic Regression with Window Size: 150
Accuracy: 0.5849
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.52      0.52      0.52        23
      UP (1)       0.63      0.63      0.63        30

    accuracy                           0.58        53
   macro avg       0.58      0.58      0.58        53
weighted avg       0.58      0.58      0.58        53

--------------------------------------------------

Evaluation for Logistic Regression with Window 

### Random Forest

In [33]:
for window_size in window_sizes:
    predictions = []
    
    for i in range(len(X_test_set)):
        end_index = split_point + i
        start_index = end_index - window_size
            
        X_train = features.iloc[start_index:end_index]
        y_train = target.iloc[start_index:end_index]
        X_test = X_test_set.iloc[[i]]

        model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
        model.fit(X_train, y_train) 
        prediction = model.predict(X_test)[0]
        predictions.append(prediction)

    print(f"\nEvaluation for Random Forest with Window Size: {window_size}")
    current_actuals = actuals[len(actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for Random Forest with Window Size: 297
Accuracy: 0.4151
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.36      0.43      0.39        23
      UP (1)       0.48      0.40      0.44        30

    accuracy                           0.42        53
   macro avg       0.42      0.42      0.41        53
weighted avg       0.43      0.42      0.42        53

--------------------------------------------------

Evaluation for Random Forest with Window Size: 150
Accuracy: 0.4340
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.37      0.43      0.40        23
      UP (1)       0.50      0.43      0.46        30

    accuracy                           0.43        53
   macro avg       0.44      0.43      0.43        53
weighted avg       0.44      0.43      0.44        53

--------------------------------------------------

Evaluation for Random Forest with Window Size: 100
Accuracy

### XG Boost

In [34]:
for window_size in window_sizes:
    predictions = []
    
    for i in range(len(X_test_set)):
        end_index = split_point + i
        start_index = end_index - window_size
            
        X_train = features.iloc[start_index:end_index]
        y_train = target.iloc[start_index:end_index]
        X_test = X_test_set.iloc[[i]]

        model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
        model.fit(X_train, y_train) 
        prediction = model.predict(X_test)[0]
        predictions.append(prediction)

    print(f"\nEvaluation for XGB with Window Size: {window_size}")
    current_actuals = actuals[len(actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for XGB with Window Size: 297
Accuracy: 0.4528
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.39      0.48      0.43        23
      UP (1)       0.52      0.43      0.47        30

    accuracy                           0.45        53
   macro avg       0.46      0.46      0.45        53
weighted avg       0.46      0.45      0.45        53

--------------------------------------------------

Evaluation for XGB with Window Size: 150
Accuracy: 0.4717
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.42      0.57      0.48        23
      UP (1)       0.55      0.40      0.46        30

    accuracy                           0.47        53
   macro avg       0.48      0.48      0.47        53
weighted avg       0.49      0.47      0.47        53

--------------------------------------------------

Evaluation for XGB with Window Size: 100
Accuracy: 0.4528
Classification Report

### MLP Classifier

In [35]:
for window_size in window_sizes:
    predictions = []
    
    for i in range(len(X_test_set)):
        end_index = split_point + i
        start_index = end_index - window_size
            
        X_train = features.iloc[start_index:end_index]
        y_train = target.iloc[start_index:end_index]
        X_test = X_test_set.iloc[[i]]

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        model = Sequential([
            Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
            Dense(16, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy')
        model.fit(X_train_scaled, y_train, epochs=10, batch_size=16, verbose=0)
    
        prediction = (model.predict(X_test_scaled, verbose=0) > 0.5).astype("int32")[0][0]
        predictions.append(prediction)
        
    print(f"\nEvaluation for MLP with Window Size: {window_size}")
    current_actuals = actuals[len(actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for MLP with Window Size: 297
Accuracy: 0.5283
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.47      0.70      0.56        23
      UP (1)       0.63      0.40      0.49        30

    accuracy                           0.53        53
   macro avg       0.55      0.55      0.53        53
weighted avg       0.56      0.53      0.52        53

--------------------------------------------------

Evaluation for MLP with Window Size: 150
Accuracy: 0.4340
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.38      0.48      0.42        23
      UP (1)       0.50      0.40      0.44        30

    accuracy                           0.43        53
   macro avg       0.44      0.44      0.43        53
weighted avg       0.45      0.43      0.44        53

--------------------------------------------------

Evaluation for MLP with Window Size: 100
Accuracy: 0.4717
Classification Report

### LSTM

In [15]:
X_test_seq = pd.concat([features.iloc[split_point - TIME_STEPS:split_point], X_test_set])
y_test_seq = pd.concat([target.iloc[split_point - TIME_STEPS:split_point], y_test_set])

In [37]:
for window_size in window_sizes:
    predictions = []
    num_predictions_possible = len(X_test_seq) - TIME_STEPS
    loop_actuals = y_test_seq[TIME_STEPS:].tolist()

    for i in range(num_predictions_possible):
        end_index = split_point + i + TIME_STEPS
        start_index = end_index - window_size - TIME_STEPS

        window_features = features.iloc[start_index:end_index]
        window_target = target.iloc[start_index:end_index]

        scaler = StandardScaler()
        window_features_scaled = scaler.fit_transform(window_features)
        
        X_seq, y_seq = create_sequences(window_features_scaled, window_target.values, TIME_STEPS)
        X_train, y_train = X_seq[:-1], y_seq[:-1]
        X_test = X_seq[-1:]
        
        model = Sequential([
            LSTM(50, activation='relu', input_shape=(TIME_STEPS, X_train.shape[2])),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy')
        model.fit(X_train, y_train, epochs=10, verbose=0)
        
        prediction = (model.predict(X_test, verbose=0) > 0.5).astype("int32")[0][0]
        predictions.append(prediction)

    print(f"\nEvaluation for LSTM with Window Size: {window_size}")
    current_actuals = loop_actuals[len(loop_actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for LSTM with Window Size: 297
Accuracy: 0.5283
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.46      0.57      0.51        23
      UP (1)       0.60      0.50      0.55        30

    accuracy                           0.53        53
   macro avg       0.53      0.53      0.53        53
weighted avg       0.54      0.53      0.53        53

--------------------------------------------------

Evaluation for LSTM with Window Size: 150
Accuracy: 0.5660
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.50      0.39      0.44        23
      UP (1)       0.60      0.70      0.65        30

    accuracy                           0.57        53
   macro avg       0.55      0.55      0.54        53
weighted avg       0.56      0.57      0.56        53

--------------------------------------------------

Evaluation for LSTM with Window Size: 100
Accuracy: 0.5472
Classification Rep

### GRU

In [38]:
for window_size in window_sizes:
    predictions = []
    num_predictions_possible = len(X_test_seq) - TIME_STEPS
    loop_actuals = y_test_seq[TIME_STEPS:].tolist()

    for i in range(num_predictions_possible):
        end_index = split_point + i + TIME_STEPS
        start_index = end_index - window_size - TIME_STEPS

        window_features = features.iloc[start_index:end_index]
        window_target = target.iloc[start_index:end_index]

        scaler = StandardScaler()
        window_features_scaled = scaler.fit_transform(window_features)
        
        X_seq, y_seq = create_sequences(window_features_scaled, window_target.values, TIME_STEPS)
        X_train, y_train = X_seq[:-1], y_seq[:-1]
        X_test = X_seq[-1:]

        model = Sequential([
            GRU(50, activation='relu', input_shape=(TIME_STEPS, X_train.shape[2])),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy')
        model.fit(X_train, y_train, epochs=10, verbose=0)
        
        prediction = (model.predict(X_test, verbose=0) > 0.5).astype("int32")[0][0]
        predictions.append(prediction)

    print(f"\nEvaluation for GRU with Window Size: {window_size}")
    current_actuals = loop_actuals[len(loop_actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for GRU with Window Size: 297
Accuracy: 0.5094
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.45      0.65      0.54        23
      UP (1)       0.60      0.40      0.48        30

    accuracy                           0.51        53
   macro avg       0.53      0.53      0.51        53
weighted avg       0.54      0.51      0.50        53

--------------------------------------------------

Evaluation for GRU with Window Size: 150
Accuracy: 0.5660
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.50      0.48      0.49        23
      UP (1)       0.61      0.63      0.62        30

    accuracy                           0.57        53
   macro avg       0.56      0.56      0.56        53
weighted avg       0.56      0.57      0.56        53

--------------------------------------------------

Evaluation for GRU with Window Size: 100
Accuracy: 0.5094
Classification Report

### 1D-CNN

In [39]:
for window_size in window_sizes:
    predictions = []
    num_predictions_possible = len(X_test_seq) - TIME_STEPS
    loop_actuals = y_test_seq[TIME_STEPS:].tolist()

    for i in range(num_predictions_possible):
        end_index = split_point + i + TIME_STEPS
        start_index = end_index - window_size - TIME_STEPS

        window_features = features.iloc[start_index:end_index]
        window_target = target.iloc[start_index:end_index]

        scaler = StandardScaler()
        window_features_scaled = scaler.fit_transform(window_features)
        
        X_seq, y_seq = create_sequences(window_features_scaled, window_target.values, TIME_STEPS)
        X_train, y_train = X_seq[:-1], y_seq[:-1]
        X_test = X_seq[-1:]
        

        model = Sequential([
            Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(TIME_STEPS, X_train.shape[2])),
            MaxPooling1D(pool_size=2),
            Flatten(),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy')
        model.fit(X_train, y_train, epochs=10, verbose=0)
        
        prediction = (model.predict(X_test, verbose=0) > 0.5).astype("int32")[0][0]
        predictions.append(prediction)

    print(f"\nEvaluation for 1D-CNN with Window Size: {window_size}")
    current_actuals = loop_actuals[len(loop_actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for 1D-CNN with Window Size: 297
Accuracy: 0.4717
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.42      0.61      0.50        23
      UP (1)       0.55      0.37      0.44        30

    accuracy                           0.47        53
   macro avg       0.49      0.49      0.47        53
weighted avg       0.50      0.47      0.47        53

--------------------------------------------------

Evaluation for 1D-CNN with Window Size: 150
Accuracy: 0.5472
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.48      0.48      0.48        23
      UP (1)       0.60      0.60      0.60        30

    accuracy                           0.55        53
   macro avg       0.54      0.54      0.54        53
weighted avg       0.55      0.55      0.55        53

--------------------------------------------------

Evaluation for 1D-CNN with Window Size: 100
Accuracy: 0.6038
Classificati

### Ensemble: 1D-CNN and LSTM

In [40]:
for window_size in window_sizes:
    predictions = []
    num_predictions_possible = len(X_test_seq) - TIME_STEPS
    loop_actuals = y_test_seq[TIME_STEPS:].tolist()

    for i in range(num_predictions_possible):
        end_index = split_point + i + TIME_STEPS
        start_index = end_index - window_size - TIME_STEPS

        window_features = features.iloc[start_index:end_index]
        window_target = target.iloc[start_index:end_index]

        scaler = StandardScaler()
        window_features_scaled = scaler.fit_transform(window_features)
        
        X_seq, y_seq = create_sequences(window_features_scaled, window_target.values, TIME_STEPS)
        X_train, y_train = X_seq[:-1], y_seq[:-1]
        X_test = X_seq[-1:]
        

        model = Sequential([
            Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(TIME_STEPS, X_train.shape[2])),
            MaxPooling1D(pool_size=2),
            LSTM(50, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy')
        model.fit(X_train, y_train, epochs=10, verbose=0)
        
        prediction = (model.predict(X_test, verbose=0) > 0.5).astype("int32")[0][0]
        predictions.append(prediction)

    print(f"\nEvaluation for 1D-CNN-LSTM with Window Size: {window_size}")
    current_actuals = loop_actuals[len(loop_actuals) - len(predictions):]
    accuracy = accuracy_score(current_actuals, predictions)
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(current_actuals, predictions, target_names=['DOWN (0)', 'UP (1)']))
    print("-" * 50)


Evaluation for 1D-CNN-LSTM with Window Size: 297
Accuracy: 0.5094
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.44      0.52      0.48        23
      UP (1)       0.58      0.50      0.54        30

    accuracy                           0.51        53
   macro avg       0.51      0.51      0.51        53
weighted avg       0.52      0.51      0.51        53

--------------------------------------------------

Evaluation for 1D-CNN-LSTM with Window Size: 150
Accuracy: 0.5283
Classification Report:
              precision    recall  f1-score   support

    DOWN (0)       0.38      0.13      0.19        23
      UP (1)       0.56      0.83      0.67        30

    accuracy                           0.53        53
   macro avg       0.47      0.48      0.43        53
weighted avg       0.48      0.53      0.46        53

--------------------------------------------------

Evaluation for 1D-CNN-LSTM with Window Size: 100
Accuracy: 0.56