In [12]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from datetime import datetime
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

In [2]:
FEATURES_DIR = "data/features"
INPUT_FILE = os.path.join(FEATURES_DIR, "BTC-USD_daily_ml_ready.parquet")
OUTPUT_FILE = os.path.join(FEATURES_DIR, "BTC-USD_daily_labeled.parquet")

def add_binary_target(df, horizon=1, neutral_threshold=None):
    """
    Add classification target:
      1 = Up, 0 = Down (optionally, 2 = Neutral if threshold given)
    horizon: how many days ahead to predict (default = 1 day)
    neutral_threshold: % change threshold to classify as Neutral (optional)
    """
    # % change from today to horizon days ahead
    df["future_return"] = df["close"].shift(-horizon) / df["close"] - 1.0

    if neutral_threshold is None:
        # Binary classification
        df["target"] = (df["future_return"] > 0).astype(int)
    else:
        # Ternary classification: Up / Down / Neutral
        df["target"] = 0  # default Down
        df.loc[df["future_return"] > neutral_threshold, "target"] = 1  # Up
        df.loc[df["future_return"].between(-neutral_threshold, neutral_threshold), "target"] = 2  # Neutral

    return df

def make_labels():
    if not os.path.exists(INPUT_FILE):
        raise FileNotFoundError("Run data_pipeline first to generate ML-ready features")

    df = pd.read_parquet(INPUT_FILE)

    # Add labels
    df = add_binary_target(df, horizon=1)  # Predict next-day movement

    # Drop last row (no label possible at end)
    df = df.dropna(subset=["future_return", "target"]).reset_index(drop=True)

    # Save
    df.to_parquet(OUTPUT_FILE, index=False)
    print(f"Saved labeled dataset to {OUTPUT_FILE}")
    print(f"Shape: {df.shape}, Positive ratio: {df['target'].mean():.2f}")

make_labels()

Saved labeled dataset to data/features\BTC-USD_daily_labeled.parquet
Shape: (3709, 65), Positive ratio: 0.47


In [3]:
FEATURES_DIR = "data/features"
INPUT_FILE = os.path.join(FEATURES_DIR, "BTC-USD_daily_labeled.parquet")
SEQ_LEN = 60  # length of input sequence (days)
BATCH_SIZE = 32

def build_sequences(df, seq_len=20, target_col="target"):
    """
    Build X, y arrays from a dataframe for sequence models.
    X shape: (num_samples, seq_len, num_features)
    y shape: (num_samples,)
    """
    values = df.drop(columns=["timestamp", "future_return", target_col]).values
    targets = df[target_col].values

    X, y = [], []
    for i in range(len(df) - seq_len):
        X.append(values[i:i+seq_len])
        y.append(targets[i+seq_len])  # label corresponds to the day after the sequence
    return np.array(X), np.array(y)

def make_tf_dataset(X, y, batch_size=32, shuffle=True):
    """Convert numpy arrays to tf.data.Dataset."""
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(X))
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

def make_sequences():
    if not os.path.exists(INPUT_FILE):
        raise FileNotFoundError("Run make_labels first to generate labeled dataset")

    df = pd.read_parquet(INPUT_FILE)

    # Build sequences
    X, y = build_sequences(df, seq_len=SEQ_LEN)

    print(f"X shape: {X.shape}, y shape: {y.shape}, Positive ratio: {y.mean():.2f}")

    # Build TensorFlow Dataset
    ds = make_tf_dataset(X, y, batch_size=BATCH_SIZE)

    # Example: iterate a batch
    for xb, yb in ds.take(1):
        print(f"Batch X shape: {xb.shape}, Batch y shape: {yb.shape}")

make_sequences()

X shape: (3649, 60, 62), y shape: (3649,), Positive ratio: 0.47
Batch X shape: (32, 60, 62), Batch y shape: (32,)


In [4]:
FEATURES_DIR = "data/features"
INPUT_FILE = os.path.join(FEATURES_DIR, "BTC-USD_daily_labeled.parquet")

def baseline_log_reg():
    if not os.path.exists(INPUT_FILE):
        raise FileNotFoundError("Run make_labels first to generate labeled dataset")

    df = pd.read_parquet(INPUT_FILE)

    look_ahead = 3  # days
    threshold = 0.01  # +1% move

    df['future_return'] = df['close'].shift(-look_ahead) / df['close'] - 1
    df['target'] = (df['future_return'] > threshold).astype(int)


    # Drop non-feature columns
    X = df.drop(columns=["timestamp", "future_return", "target"]).values
    y = df["target"].values

    # Train/test split (80/20)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, shuffle=False  # no shuffle = respects time order
    )

    # Train Logistic Regression
    clf = LogisticRegression(max_iter=1000, class_weight="balanced")
    clf.fit(X_train, y_train)

    # Predict
    y_pred = clf.predict(X_test)
    y_prob = clf.predict_proba(X_test)[:, 1]

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)

    print("\nLogistic Regression Baseline Results")
    print(f"Accuracy   : {acc:.4f}")
    print(f"Precision  : {prec:.4f}")
    print(f"Recall     : {rec:.4f}")
    print(f"F1 Score   : {f1:.4f}")
    print(f"ROC-AUC    : {auc:.4f}")

baseline_log_reg()


Logistic Regression Baseline Results
Accuracy   : 0.4798
Precision  : 0.4798
Recall     : 1.0000
F1 Score   : 0.6485
ROC-AUC    : 0.5140


### Building Walk-Forward Validation

In [13]:
FEATURES_DIR = "data/features"
INPUT_FILE = os.path.join(FEATURES_DIR, "BTC-USD_daily_labeled.parquet")


# ----1. Build Sequences -----------
TOP_FEATURES = ['volatility_21d', 'volatility_10d', 'return_14d', 'return_3d', 'bollinger_down']

def build_sequences_filtered(df, seq_len=30, target_col="target"):
    """
    Build sequences but only with top selected features
    """
    values = df[TOP_FEATURES].values  # Use only top features
    targets = df[target_col].values

    X, y = [], []
    for i in range(len(df) - seq_len):
        X.append(values[i:i+seq_len])
        y.append(targets[i+seq_len])
    return np.array(X), np.array(y)

# ----2. Get Callbacks -----------
def get_callbacks(output_dir="experiments", model_name="model"):

    os.makedirs(output_dir, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    run_dir = os.path.join(output_dir, f"{model_name}_{timestamp}")
    os.makedirs(run_dir, exist_ok=True)

    callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss",
            patience=5,
            restore_best_weights=True,
            verbose=1
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss",
            factor=0.5,
            patience=3,
            min_lr=1e-6,
            verbose=1
        ),
        tf.keras.callbacks.ModelCheckpoint(
            filepath=os.path.join(run_dir, "best_model.h5"),
            monitor="val_loss",
            save_best_only=True,
            verbose=1
        ),
        tf.keras.callbacks.CSVLogger(
            filename=os.path.join(run_dir, "training_log.csv")
        )
    ]

    return callbacks, run_dir

# ----3. Walk Forward Validation -----------
def walk_forward_validation(build_model_fn, seq_len, 
                            train_size=0.7, val_size=0.1, test_size=0.2,
                            n_splits=3, batch_size=32, epochs=20, callbacks=None):
    """
    Perform walk-forward validation for time series models.
    
    Args:
        X, y: numpy arrays (sequences + labels)
        build_model_fn: function that returns a compiled tf.keras model
        seq_len: length of input sequences
        train_size, val_size, test_size: proportions of data
        n_splits: number of walk-forward splits
        batch_size, epochs: training params
        callbacks: list of Keras callbacks
    Returns:
        results: list of dicts with metrics for each split
    """

    if not os.path.exists(INPUT_FILE):
        raise FileNotFoundError("Run make_labels.py first to generate labeled dataset")

    df = pd.read_parquet(INPUT_FILE)
    look_ahead = 3  # days
    threshold = 0.01  # +1% move

    df['future_return'] = df['close'].shift(-look_ahead) / df['close'] - 1
    df['target'] = (df['future_return'] > threshold).astype(int)

    # Build sequences
    # X, y = build_sequences(df, seq_len)
    X, y = build_sequences_filtered(df, seq_len)

    results = []
    total_len = len(X)
    split_len = int(total_len * test_size)  # test size per split

    for i in range(n_splits):
        # Define rolling window indices
        end_test = total_len - i * split_len
        start_test = end_test - split_len
        end_train = start_test - 1

        X_train, y_train = X[:end_train], y[:end_train]
        X_test, y_test = X[start_test:end_test], y[start_test:end_test]

        # Further split train into train/val
        split_idx = int(len(X_train) * (1 - val_size))
        X_tr, X_val = X_train[:split_idx], X_train[split_idx:]
        y_tr, y_val = y_train[:split_idx], y_train[split_idx:]

        # Build datasets
        train_ds = tf.data.Dataset.from_tensor_slices((X_tr, y_tr)).shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
        val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
        test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        # Build model
        model = build_model_fn(seq_len, X.shape[2])
        
        # Compute class weights
        classes = np.unique(y_train)
        weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
        class_weights = dict(zip(classes, weights))

        # Train
        history = model.fit(train_ds, validation_data=val_ds, 
                            epochs=epochs, callbacks=callbacks, 
                            verbose=1, class_weight=class_weights)

        # Evaluate
        y_prob = model.predict(test_ds).ravel()
        y_pred = (y_prob > 0.5).astype(int)

        metrics = {
            "split": i+1,
            "accuracy": accuracy_score(y_test, y_pred),
            "precision": precision_score(y_test, y_pred),
            "recall": recall_score(y_test, y_pred),
            "f1": f1_score(y_test, y_pred),
            "roc_auc": roc_auc_score(y_test, y_prob)
        }
        val_loss, val_acc = model.evaluate(val_ds, verbose=0)
        metrics["val_loss"] = val_loss
        metrics["val_accuracy"] = val_acc
        metrics["model_path"] = callbacks[2].filepath

        results.append(metrics)

        print(f"\nSplit {i+1} Results: {metrics}")

    return results

### LSTM Model

In [15]:
def build_lstm(seq_len, num_features, config="small"):
    """
    Build LSTM model based on config size: 'small', 'medium', 'large'
    """
    if config == "small":
        lstm_units = [64]
        dense_units = [32]
        dropout_rate = 0.3

    elif config == "medium":
        lstm_units = [64, 32]
        dense_units = [32]
        dropout_rate = 0.3

    elif config == "large":
        lstm_units = [128, 64]
        dense_units = [64, 32]
        dropout_rate = 0.4

    # Build Sequential Model
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(seq_len, num_features)))

    # Add LSTM layers
    for i, units in enumerate(lstm_units):
        return_sequences = (i < len(lstm_units) - 1)  # True except last LSTM
        model.add(tf.keras.layers.LSTM(units, return_sequences=return_sequences))
        model.add(tf.keras.layers.Dropout(dropout_rate))

    # Add Dense layers
    for units in dense_units:
        model.add(tf.keras.layers.Dense(units, activation="relu"))
        model.add(tf.keras.layers.Dropout(dropout_rate))

    # Output layer
    model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

    # Compile
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model

model_config = "medium"

callbacks, _ = get_callbacks(output_dir="experiments", model_name=f"lstm{model_config}")

results = walk_forward_validation(
    build_model_fn=lambda seq_len, num_features: build_lstm(seq_len, num_features, config=model_config),
    seq_len=30, 
    n_splits=3, 
    batch_size=32, 
    epochs=20, 
    callbacks=callbacks
)

# Convert list of dicts to DataFrame for easy analysis
results_df = pd.DataFrame(results)

# Find best split by lowest validation loss
best_idx = results_df["val_loss"].idxmin()
best_result = results_df.iloc[best_idx]

print(" BEST LSTM MODEL SUMMARY")
print("")
print(f"Best Split #: {best_result['split']}")
print(f"Validation Loss : {best_result['val_loss']:.4f}")
print(f"Validation Accuracy : {best_result['val_accuracy']:.4f}")
print(f"Test Accuracy : {best_result['accuracy']:.4f}")
print(f"Test F1 Score : {best_result['f1']:.4f}")
print(f"Test ROC-AUC : {best_result['roc_auc']:.4f}")
print(f"Saved Model Path : {best_result['model_path']}")

Epoch 1/20
[1m82/83[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 22ms/step - accuracy: 0.7370 - loss: 0.5802
Epoch 1: val_loss improved from inf to 0.70622, saving model to experiments\lstmmedium_20250923-185045\best_model.h5




[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 30ms/step - accuracy: 0.7357 - loss: 0.5815 - val_accuracy: 0.4746 - val_loss: 0.7062 - learning_rate: 0.0010
Epoch 2/20
[1m82/83[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.7419 - loss: 0.5329
Epoch 2: val_loss did not improve from 0.70622
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - accuracy: 0.7404 - loss: 0.5344 - val_accuracy: 0.4610 - val_loss: 0.7674 - learning_rate: 0.0010
Epoch 3/20
[1m81/83[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 24ms/step - accuracy: 0.7240 - loss: 0.4879
Epoch 3: val_loss did not improve from 0.70622
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - accuracy: 0.7215 - loss: 0.4909 - val_accuracy: 0.3932 - val_loss: 0.8244 - learning_rate: 0.0010
Epoch 4/20
[1m81/83[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 22ms/step - accuracy: 0.7487 - loss: 0.4691
Epoch 4: ReduceLROnPlateau 



[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 40ms/step - accuracy: 0.8006 - loss: 0.5740 - val_accuracy: 0.5113 - val_loss: 0.6937 - learning_rate: 0.0010
Epoch 2/20
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.6456 - loss: 0.5558
Epoch 2: val_loss did not improve from 0.69365
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 33ms/step - accuracy: 0.6456 - loss: 0.5566 - val_accuracy: 0.4977 - val_loss: 0.6991 - learning_rate: 0.0010
Epoch 3/20
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.6405 - loss: 0.5385
Epoch 3: val_loss did not improve from 0.69365
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - accuracy: 0.6404 - loss: 0.5394 - val_accuracy: 0.4977 - val_loss: 0.7205 - learning_rate: 0.0010
Epoch 4/20
[1m62/63[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - accuracy: 0.7085 - loss: 0.5161
Epoch 4: ReduceLROnPlateau 



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 40ms/step - accuracy: 0.6423 - loss: 0.6230 - val_accuracy: 0.6014 - val_loss: 0.6454 - learning_rate: 0.0010
Epoch 2/20
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 37ms/step - accuracy: 0.6888 - loss: 0.5709
Epoch 2: val_loss did not improve from 0.64539
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - accuracy: 0.6883 - loss: 0.5720 - val_accuracy: 0.5000 - val_loss: 0.6821 - learning_rate: 0.0010
Epoch 3/20
[1m40/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - accuracy: 0.6972 - loss: 0.5633
Epoch 3: val_loss improved from 0.64539 to 0.56971, saving model to experiments\lstmmedium_20250923-185045\best_model.h5




[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - accuracy: 0.6983 - loss: 0.5625 - val_accuracy: 0.6757 - val_loss: 0.5697 - learning_rate: 0.0010
Epoch 4/20
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.7883 - loss: 0.5200
Epoch 4: val_loss did not improve from 0.56971
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.7870 - loss: 0.5206 - val_accuracy: 0.5270 - val_loss: 0.7757 - learning_rate: 0.0010
Epoch 5/20
[1m40/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - accuracy: 0.8000 - loss: 0.4697
Epoch 5: val_loss did not improve from 0.56971
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.7980 - loss: 0.4723 - val_accuracy: 0.5405 - val_loss: 0.7475 - learning_rate: 0.0010
Epoch 6/20
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 25ms/step - accuracy: 0.7795 - loss: 0.4352
Epoch 6: ReduceLROnPlateau 

### GRU Model

In [16]:
def build_gru(seq_len, num_features, config="medium"):
    """
    Build GRU model based on config size: 'small', 'medium', 'large'
    """
    if config == "small":
        gru_units = [64]
        dense_units = [32]
        dropout_rate = 0.3

    elif config == "medium":
        gru_units = [64, 32]
        dense_units = [32]
        dropout_rate = 0.3

    elif config == "large":
        gru_units = [128, 64]
        dense_units = [64, 32]
        dropout_rate = 0.4

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(seq_len, num_features)))

    # GRU layers
    for i, units in enumerate(gru_units):
        return_sequences = (i < len(gru_units) - 1)
        model.add(tf.keras.layers.GRU(units, return_sequences=return_sequences))
        model.add(tf.keras.layers.Dropout(dropout_rate))

    # Dense layers
    for units in dense_units:
        model.add(tf.keras.layers.Dense(units, activation="relu"))
        model.add(tf.keras.layers.Dropout(dropout_rate))

    # Output layer
    model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

    # Compile
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model

model_config = "medium"

callbacks, _ = get_callbacks(output_dir="experiments", model_name=f"gru_{model_config}")

results_gru = walk_forward_validation(
    build_model_fn=lambda seq_len, num_features: build_gru(seq_len, num_features, config=model_config),
    seq_len=60, 
    n_splits=3, 
    batch_size=32, 
    epochs=20, 
    callbacks=callbacks
)

results_gru_df = pd.DataFrame(results_gru)
best_idx = results_gru_df["val_loss"].idxmin()
best_result = results_gru_df.iloc[best_idx]

print("\n BEST GRU MODEL SUMMARY")
print(f"Best Split #: {best_result['split']}")
print(f"Validation Loss : {best_result['val_loss']:.4f}")
print(f"Validation Accuracy : {best_result['val_accuracy']:.4f}")
print(f"Test Accuracy : {best_result['accuracy']:.4f}")
print(f"Test F1 Score : {best_result['f1']:.4f}")
print(f"Test ROC-AUC : {best_result['roc_auc']:.4f}")
print(f"Saved Model Path : {best_result['model_path']}")

Epoch 1/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.7643 - loss: 0.5702
Epoch 1: val_loss improved from inf to 0.68238, saving model to experiments\gru_medium_20250923-185219\best_model.h5




[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 80ms/step - accuracy: 0.7635 - loss: 0.5710 - val_accuracy: 0.5651 - val_loss: 0.6824 - learning_rate: 0.0010
Epoch 2/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - accuracy: 0.6913 - loss: 0.5564
Epoch 2: val_loss did not improve from 0.68238
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 91ms/step - accuracy: 0.6909 - loss: 0.5570 - val_accuracy: 0.4007 - val_loss: 0.7608 - learning_rate: 0.0010
Epoch 3/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - accuracy: 0.7621 - loss: 0.5103
Epoch 3: val_loss did not improve from 0.68238
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 96ms/step - accuracy: 0.7611 - loss: 0.5112 - val_accuracy: 0.3870 - val_loss: 0.7954 - learning_rate: 0.0010
Epoch 4/20
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.7338 - loss: 0.4717
Epoch 4: ReduceLROnPlateau



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 74ms/step - accuracy: 0.4394 - loss: 0.6555 - val_accuracy: 0.5646 - val_loss: 0.6670 - learning_rate: 0.0010
Epoch 2/20
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 0.6776 - loss: 0.6063
Epoch 2: val_loss did not improve from 0.66701
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 63ms/step - accuracy: 0.6776 - loss: 0.6059 - val_accuracy: 0.4762 - val_loss: 0.7000 - learning_rate: 0.0010
Epoch 3/20
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 46ms/step - accuracy: 0.6508 - loss: 0.5924
Epoch 3: val_loss did not improve from 0.66701
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 48ms/step - accuracy: 0.6521 - loss: 0.5917 - val_accuracy: 0.5306 - val_loss: 0.6782 - learning_rate: 0.0010
Epoch 4/20
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 50ms/step - accuracy: 0.6725 - loss: 0.5898
Epoch 4: ReduceLROnPlateau

### Conv1D Model

In [11]:
from sklearn.utils.class_weight import compute_class_weight

FEATURES_DIR = "data/features"
INPUT_FILE = os.path.join(FEATURES_DIR, "BTC-USD_daily_labeled.parquet")
seq_len = 20  # length of input sequence (days) 
test_size = 0.2
n_splits = 3

if not os.path.exists(INPUT_FILE):
    raise FileNotFoundError("Run make_labels.py first to generate labeled dataset")

df = pd.read_parquet(INPUT_FILE)
look_ahead = 3  # days
threshold = 0.01  # +1% move

df['future_return'] = df['close'].shift(-look_ahead) / df['close'] - 1
df['target'] = (df['future_return'] > threshold).astype(int)

# Build sequences
# X, y = build_sequences(df, seq_len)
X, y = build_sequences_filtered(df, seq_len)

results = []
total_len = len(X)
split_len = int(total_len * test_size)  # test size per split

for i in range(n_splits):
    # Define rolling window indices
    end_test = total_len - i * split_len
    start_test = end_test - split_len
    end_train = start_test - 1

    X_train, y_train = X[:end_train], y[:end_train]
    X_test, y_test = X[start_test:end_test], y[start_test:end_test]

# Compute class weights
classes = np.unique(y_train)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weights = dict(zip(classes, weights))

print("Class Weights:", class_weights)

Class Weights: {0: 0.5796703296703297, 1: 3.6379310344827585}


In [14]:
def build_conv1d(seq_len, num_features, config="medium"):
    """
    Improved Conv1D model for time series classification
    """
    if config == "small":
        filters = [32]
        kernel_size = 3
        dense_units = [32]
        dropout_rate = 0.2

    elif config == "medium":
        filters = [64, 32]
        kernel_size = 3
        dense_units = [64, 32]
        dropout_rate = 0.2

    elif config == "large":
        filters = [128, 64, 32]
        kernel_size = 5
        dense_units = [128, 64, 32]
        dropout_rate = 0.3

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(seq_len, num_features)))

    # === Conv1D Layers ===
    for f in filters:
        model.add(tf.keras.layers.Conv1D(filters=f, kernel_size=kernel_size, activation="relu", padding="causal"))
        model.add(tf.keras.layers.BatchNormalization())  # <-- helps stabilize training
        model.add(tf.keras.layers.Dropout(dropout_rate))

    # Replace Flatten with Global Pooling
    model.add(tf.keras.layers.GlobalAveragePooling1D())

    # === Dense Layers ===
    for units in dense_units:
        model.add(tf.keras.layers.Dense(units, activation="relu"))
        model.add(tf.keras.layers.Dropout(dropout_rate))

    # Output Layer
    model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

    # Compile with lower learning rate
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),  # reduced LR
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model


model_config = "medium"

callbacks, _ = get_callbacks(output_dir="experiments", model_name=f"conv1d_{model_config}")

results_conv = walk_forward_validation(
    build_model_fn=lambda seq_len, num_features: build_conv1d(seq_len, num_features, config=model_config),
    seq_len=20, 
    n_splits=3, 
    batch_size=32, 
    epochs=20, 
    callbacks=callbacks
)

results_conv_df = pd.DataFrame(results_conv)
best_idx = results_conv_df["val_loss"].idxmin()
best_result = results_conv_df.iloc[best_idx]

print("\n BEST CONV1D MODEL SUMMARY")
print(f"Best Split #: {best_result['split']}")
print(f"Validation Loss : {best_result['val_loss']:.4f}")
print(f"Validation Accuracy : {best_result['val_accuracy']:.4f}")
print(f"Test Accuracy : {best_result['accuracy']:.4f}")
print(f"Test F1 Score : {best_result['f1']:.4f}")
print(f"Test ROC-AUC : {best_result['roc_auc']:.4f}")
print(f"Saved Model Path : {best_result['model_path']}")

Epoch 1/20
[1m77/83[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 8ms/step - accuracy: 0.6296 - loss: 0.5973
Epoch 1: val_loss improved from inf to 0.68003, saving model to experiments\conv1d_medium_20250923-184550\best_model.h5




[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 18ms/step - accuracy: 0.6294 - loss: 0.6034 - val_accuracy: 0.6182 - val_loss: 0.6800 - learning_rate: 1.0000e-04
Epoch 2/20
[1m81/83[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - accuracy: 0.6334 - loss: 0.5774
Epoch 2: val_loss did not improve from 0.68003
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.6331 - loss: 0.5796 - val_accuracy: 0.5811 - val_loss: 0.6804 - learning_rate: 1.0000e-04
Epoch 3/20
[1m82/83[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.6127 - loss: 0.5654
Epoch 3: val_loss did not improve from 0.68003
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6127 - loss: 0.5669 - val_accuracy: 0.5642 - val_loss: 0.6825 - learning_rate: 1.0000e-04
Epoch 4/20
[1m79/83[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.6154 - loss: 0.5544
Epoch 4: ReduceLRO



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5548 - loss: 0.5778 - val_accuracy: 0.5203 - val_loss: 0.6799 - learning_rate: 1.0000e-04
Epoch 9/20
[1m36/42[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 8ms/step - accuracy: 0.5678 - loss: 0.5361
Epoch 9: val_loss improved from 0.67993 to 0.67026, saving model to experiments\conv1d_medium_20250923-184550\best_model.h5




[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.5733 - loss: 0.5378 - val_accuracy: 0.5270 - val_loss: 0.6703 - learning_rate: 1.0000e-04
Epoch 10/20
[1m36/42[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 8ms/step - accuracy: 0.6097 - loss: 0.5198
Epoch 10: val_loss did not improve from 0.67026
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6085 - loss: 0.5227 - val_accuracy: 0.5203 - val_loss: 0.6707 - learning_rate: 1.0000e-04
Epoch 11/20
[1m39/42[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 7ms/step - accuracy: 0.6079 - loss: 0.5159
Epoch 11: val_loss did not improve from 0.67026
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6080 - loss: 0.5195 - val_accuracy: 0.5203 - val_loss: 0.6729 - learning_rate: 1.0000e-04
Epoch 12/20
[1m39/42[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 10ms/step - accuracy: 0.6023 - loss: 0.5373
Epoch 12: val_



[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.6036 - loss: 0.5365 - val_accuracy: 0.5270 - val_loss: 0.6699 - learning_rate: 1.0000e-04
Epoch 13/20
[1m36/42[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 14ms/step - accuracy: 0.6389 - loss: 0.5515
Epoch 13: val_loss did not improve from 0.66993
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.6385 - loss: 0.5466 - val_accuracy: 0.5203 - val_loss: 0.6746 - learning_rate: 1.0000e-04
Epoch 14/20
[1m37/42[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 8ms/step - accuracy: 0.6771 - loss: 0.5028
Epoch 14: val_loss did not improve from 0.66993
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6765 - loss: 0.5028 - val_accuracy: 0.5135 - val_loss: 0.6726 - learning_rate: 1.0000e-04
Epoch 15/20
[1m37/42[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 7ms/step - accuracy: 0.6644 - loss: 0.5009
Epoch 15: Red