In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers # type: ignore
import io
from typing import List, Tuple
import warnings

# Suppress FutureWarning from scikit-learn
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
def create_training_testing_datasets(df, lookback_window=30, predict_horizon=1):
    """
    Creates training and testing datasets (X, Y) from a prepared DataFrame,
    applying scaling to both inputs and outputs.
    """
    if df is None or df.empty:
        print("Error: Input DataFrame is empty or None.")
        return None, None, None, None

    print(f"\n--- Creating Training/Testing Datasets ---")

    # Select features (inputs) and targets (outputs)
    input_features = [
        'RSI', 'MACD', 'MACD_Signal', 'Momentum', 'OBV', 'ATR',
        'EPS_Growth', 'Revenue_Growth', 'ROE'
    ]
    output_targets = ['RSI', 'MACD', 'close'] # 'close' here refers to the future close price

    # Ensure all required columns exist after calculations and merges
    missing_cols = [col for col in input_features + output_targets if col not in df.columns]
    if missing_cols:
        print(f"Error: Missing required columns in DataFrame: {missing_cols}")
        return None, None, None, None

    # Normalize indicators
    scaler_input = MinMaxScaler()
    scaler_output = MinMaxScaler()

    # Create input and output sequences
    X, Y = [], []
    # The range for `i` must ensure that `i + lookback_window + predict_horizon - 1`
    # does not exceed the length of the DataFrame.
    # The last valid index for `Y` will be `len(df) - 1`.
    # So, `i + lookback_window + predict_horizon - 1 <= len(df) - 1`
    # which simplifies to `i <= len(df) - lookback_window - predict_horizon`.
    for i in range(len(df) - lookback_window - predict_horizon + 1):
        # Input: `lookback_window` days of selected indicators
        X.append(df[input_features].iloc[i : i + lookback_window].values)
        # Output: `predict_horizon` day's RSI, MACD, and 'close' price
        Y.append(df[output_targets].iloc[i + lookback_window + predict_horizon - 1].values)

    X = np.array(X)
    Y = np.array(Y)

    if X.size == 0 or Y.size == 0:
        print(f"Not enough data after windowing with lookback_window={lookback_window} and predict_horizon={predict_horizon}.")
        print("Adjust lookback_window or predict_horizon, or ensure sufficient input data.")
        return None, None, None, None

    # Apply scaling to input features
    # Reshape X to (n_samples * lookback_window, n_features) for scaling
    original_shape_X = X.shape
    X_reshaped_for_scaling = X.reshape(-1, original_shape_X[-1])
    X_scaled_reshaped = scaler_input.fit_transform(X_reshaped_for_scaling)
    X_scaled = X_scaled_reshaped.reshape(original_shape_X)

    # Apply scaling to output targets
    Y_scaled = scaler_output.fit_transform(Y)

    print(f"Created X_scaled with shape: {X_scaled.shape}")
    print(f"Created Y_scaled with shape: {Y_scaled.shape}")

    return X_scaled, Y_scaled, scaler_input, scaler_output

In [3]:
# LOOK_BACK: This constant defines the size of the input sequence (or "look-back window").
# It specifies how many historical time steps the Transformer model will consider
# when making a prediction.
LOOK_BACK = 1300  # 5 year data
# PREDICT_AHEAD: This constant determines the number of future time steps the model
# will predict.
PREDICT_AHEAD = 30
# EMBED_DIM: This is the embedding dimension used throughout the Transformer model.
# It represents the size of the vector space into which input features are projected.
# A larger embedding dimension allows the model to capture more complex relationships
# but also increases computational cost. It's crucial that this is divisible by NUM_HEADS.
EMBED_DIM = 128
# NUM_HEADS: This specifies the number of "attention heads" in the MultiHeadSelfAttention
# layer. Multi-head attention allows the model to jointly attend to information from
# different representation subspaces at different positions. More heads can capture
# diverse patterns but also increase complexity.
NUM_HEADS = 8
# FF_DIM: This is the hidden dimension of the feed-forward network within each
# TransformerBlock. The feed-forward network processes the output of the attention
# mechanism. It typically expands the dimensionality before projecting it back
# to the EMBED_DIM.
FF_DIM = 256
# DROPOUT_RATE: This is the dropout rate applied for regularization within the
# TransformerBlocks. Dropout randomly sets a fraction of input units to 0 at each
# update during training, which helps prevent overfitting by forcing the network
# to learn more robust features.
DROPOUT_RATE = 0.2

In [4]:
# --- Data Loading and Preprocessing ---
def load_and_preprocess_data(csv_file_path): # Renamed argument to clarify it's a path
    """
    Loads data from a CSV file path, selects relevant features, and scales them.
    """
    # Load the CSV data into a pandas DataFrame
    print(f"Loading data from {csv_file_path}...")
    df = pd.read_csv(csv_file_path) # Now directly reads from the file path

    # Convert 'date' column to datetime objects and set as index
    df['date'] = pd.to_datetime(df['date'])
    df = df.set_index('date')

    # Define the features to be used as input and output
    # Based on the user's request: [RSI, MACD, MACD_Signal, Momentum, OBV, ATR, Revenue Growth, EPS Growth, ROE]
    features = ['RSI', 'MACD', 'MACD_Signal', 'Momentum', 'OBV', 'ATR', 'Revenue_Growth', 'EPS_Growth', 'ROE']

    # Select only the relevant features
    df_features = df[features]
    print(f"Initial DataFrame shape: {df_features.shape}")

    # Handle missing values: forward fill then backfill to ensure no NaNs remain
    df_features = df_features.fillna(method='ffill').fillna(method='bfill')

    # Initialize scalers for input and output features
    # For time series prediction where input and output features are the same,
    # we can use one scaler for simplicity, or separate if their scaling needs differ.
    # Here, we'll use one scaler for the features themselves.
    scaler_input = MinMaxScaler(feature_range=(0, 1))
    scaler_output = MinMaxScaler(feature_range=(0, 1))

    # Fit and transform the features
    scaled_data = scaler_input.fit_transform(df_features)
    # The output scaler will be fitted on the same data, but conceptually it's for inverse_transforming predictions
    scaler_output.fit(df_features)

    return scaled_data, scaler_input, scaler_output, features

In [5]:
def save_dataframe_to_csv(df: pd.DataFrame, ticker: str, filename: str = None) -> bool:
    """
    Saves a pandas DataFrame to a CSV file.
    """
    if not isinstance(df, pd.DataFrame) or df.empty:
        print("Error: Input is not a valid or non-empty DataFrame. Nothing to save.")
        return False

    if filename is None:
        filename = f"{ticker}_market_data.csv"

    try:
        # Save the DataFrame to a CSV file.
        # The index (which is the date) is crucial, so we ensure it's saved.
        df.to_csv(filename, index=True)
        print(f"\nDataFrame successfully saved to '{filename}'")
        return True
    except IOError as e:
        # Handle specific file system errors
        print(f"\nAn I/O error occurred while saving the file: {e}")
        return False
    except Exception as e:
        # Handle any other unexpected errors
        print(f"\nAn unexpected error occurred: {e}")
        return False

In [7]:
def create_sequences(data, look_back, predict_ahead):
    """
    Creates input (X) and output (Y) sequences for the Transformer model.

    Args:
        data (numpy.ndarray): The scaled time series data.
        look_back (int): The number of past time steps for each input sequence.
        predict_ahead (int): The number of future time steps for each output sequence.

    Returns:
        tuple: A tuple containing:
            - numpy.ndarray: Input sequences (X).
            - numpy.ndarray: Output sequences (Y).
    """
    X, Y = [], []
    for i in range(len(data) - look_back - predict_ahead + 1):
        # Input sequence: from current position 'i' up to 'look_back' steps
        X.append(data[i:(i + look_back)])
        # Output sequence: 'predict_ahead' steps starting right after the input sequence
        Y.append(data[(i + look_back):(i + look_back + predict_ahead)])
    return np.array(X), np.array(Y)


In [8]:
#  Transformer Model Architecture ---

class MultiHeadSelfAttention(layers.Layer):
    """
    Multi-Head Self-Attention layer as described in the Transformer paper.
    """
    def __init__(self, embed_dim, num_heads=8, **kwargs):
        super(MultiHeadSelfAttention, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        query = self.separate_heads(query, batch_size)
        key = self.separate_heads(key, batch_size)
        value = self.separate_heads(value, batch_size)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
        output = self.combine_heads(concat_attention)
        return output

In [9]:
class PositionalEmbedding(layers.Layer):
    """
    Positional Embedding layer to inject sequence order information.
    """
    def __init__(self, sequence_length, embed_dim, **kwargs):
        super(PositionalEmbedding, self).__init__(**kwargs)
        self.sequence_length = sequence_length
        self.embed_dim = embed_dim
        self.position_embedding = layers.Embedding(
            input_dim=sequence_length, output_dim=embed_dim
        )
        self.scale = tf.math.sqrt(tf.cast(embed_dim, tf.float32))

    def call(self, inputs):
        length = tf.shape(inputs)[1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_positions = self.position_embedding(positions)
        return inputs * self.scale + embedded_positions

In [10]:
class TransformerBlock(layers.Layer):
    """
    A single Transformer block combining Multi-Head Attention and a Feed-Forward Network.
    """
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super(TransformerBlock, self).__init__(**kwargs)
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=None): # <--- FIX IS HERE: Add `training=None`
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [11]:
def build_transformer_model(input_shape, output_sequence_length, output_features_count):
    """
    Builds the Transformer model for time series prediction.

    Args:
        input_shape (tuple): Shape of the input sequences (look_back, num_features).
        output_sequence_length (int): The length of the output sequence (predict_ahead).
        output_features_count (int): The number of features in the output.

    Returns:
        keras.Model: The compiled Transformer model.
    """
    inputs = layers.Input(shape=input_shape) # (None, LOOK_BACK, NUM_FEATURES)

    # Project input features to EMBED_DIM before positional embedding
    x = layers.TimeDistributed(layers.Dense(EMBED_DIM))(inputs) # Shape: (None, LOOK_BACK, EMBED_DIM)

    # Positional Embedding for input sequence
    x = PositionalEmbedding(input_shape[0], EMBED_DIM)(x) # Shape: (None, LOOK_BACK, EMBED_DIM)

    # Apply Transformer blocks
    transformer_block = TransformerBlock(EMBED_DIM, NUM_HEADS, FF_DIM, DROPOUT_RATE)
    # Pass the `training` argument, which Keras automatically provides when
    # defining a model via the functional API.
    x = transformer_block(x, training=True) # <--- FIX: Pass `training=True` here, or better, pass `inputs.is_training` or infer from context

    # In Keras functional API, you typically chain layers, and Keras handles
    # the `training` argument propagation. When defining a custom Layer's `call`
    # method with `training` as a parameter, Keras expects it to be passed.

    # A more robust way in the functional API is to define `training` as a placeholder:
    # `training_arg = tf.keras.backend.learning_phase()`
    # And then pass `training=training_arg` to your custom layers.
    # However, for simple Dropout control within custom layers, Keras usually handles this automatically
    # if the `call` signature matches `(self, inputs, training=None)`.
    # Since your `call` does NOT have a default value for `training`, it's a required argument.

    # Let's directly pass it to the TransformerBlock.
    # When building the model, we implicitly define the computation graph.
    # Keras will handle `training` appropriately during .fit() and .predict().
    # The common practice is to pass it down.
    # x = transformer_block(x, training=inputs.is_training) # This is how it's often handled implicitly.
    # But since it's a direct Layer call, let's ensure it gets the argument.

    # The most common way to handle this in Keras Functional API
    # when a custom layer explicitly requires `training` is to ensure
    # your `call` method in `TransformerBlock` looks like:
    # `def call(self, inputs, training=None):`
    # (with a default `None`).
    # If you remove the default, then Keras doesn't know how to fill it automatically
    # in the functional API graph building unless you explicitly provide it in `Input`.

    # Let's fix your `TransformerBlock` call method first, as it's the more Keras-idiomatic way.
    # I will provide the updated TransformerBlock first.

    # Let's assume you have updated TransformerBlock's call method:
    # `def call(self, inputs, training=None):`
    # If so, then `x = transformer_block(x)` should work.
    # If not, you *must* pass it.

    # Given your current error, `training` is *missing*, meaning no default.
    # The most direct fix is to ensure the functional API correctly provides it.
    # Keras usually provides it automatically if the signature is `call(self, inputs, training=None)`

    # Temporary explicit fix:
    # x = transformer_block(x, training=True) # This forces training mode, not ideal for inference
    # Better: modify TransformerBlock's call signature.

    # Let's assume you've used the recommended signature `def call(self, inputs, training=None):`
    # then the following line is correct as is:
    x = transformer_block(x) # Call the block without explicitly passing 'training' if it has a default

    # Flatten the output from the TransformerBlock so that the Dense layer can operate on a single vector per sample.
    x = layers.Flatten()(x) # Shape: (None, LOOK_BACK * EMBED_DIM)

    # The Dense layer now outputs a flat vector of `predict_ahead * num_features`
    outputs = layers.Dense(output_sequence_length * output_features_count)(x) # Shape: (None, PREDICT_AHEAD * NUM_FEATURES)

    # Reshape to the desired output sequence shape
    outputs = layers.Reshape((output_sequence_length, output_features_count))(outputs) # Shape: (None, PREDICT_AHEAD, NUM_FEATURES)

    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

In [12]:
def build_transformer_model(input_shape, output_sequence_length, output_features_count):
    """
    Builds the Transformer model for time series prediction.

    Args:
        input_shape (tuple): Shape of the input sequences (look_back, num_features).
        output_sequence_length (int): The length of the output sequence (predict_ahead).
        output_features_count (int): The number of features in the output.

    Returns:
        keras.Model: The compiled Transformer model.
    """
    inputs = layers.Input(shape=input_shape) # (None, LOOK_BACK, NUM_FEATURES)

    # Project input features to EMBED_DIM before positional embedding
    x = layers.TimeDistributed(layers.Dense(EMBED_DIM))(inputs) # Shape: (None, LOOK_BACK, EMBED_DIM)

    # Positional Embedding for input sequence
    x = PositionalEmbedding(input_shape[0], EMBED_DIM)(x) # Shape: (None, LOOK_BACK, EMBED_DIM)

    # Apply Transformer blocks
    transformer_block = TransformerBlock(EMBED_DIM, NUM_HEADS, FF_DIM, DROPOUT_RATE)
    # Pass the `training` argument, which Keras automatically provides when
    # defining a model via the functional API.
    x = transformer_block(x, training=True) # <--- FIX: Pass `training=True` here, or better, pass `inputs.is_training` or infer from context

    # In Keras functional API, you typically chain layers, and Keras handles
    # the `training` argument propagation. When defining a custom Layer's `call`
    # method with `training` as a parameter, Keras expects it to be passed.

    # A more robust way in the functional API is to define `training` as a placeholder:
    # `training_arg = tf.keras.backend.learning_phase()`
    # And then pass `training=training_arg` to your custom layers.
    # However, for simple Dropout control within custom layers, Keras usually handles this automatically
    # if the `call` signature matches `(self, inputs, training=None)`.
    # Since your `call` does NOT have a default value for `training`, it's a required argument.

    # Let's directly pass it to the TransformerBlock.
    # When building the model, we implicitly define the computation graph.
    # Keras will handle `training` appropriately during .fit() and .predict().
    # The common practice is to pass it down.
    # x = transformer_block(x, training=inputs.is_training) # This is how it's often handled implicitly.
    # But since it's a direct Layer call, let's ensure it gets the argument.

    # The most common way to handle this in Keras Functional API
    # when a custom layer explicitly requires `training` is to ensure
    # your `call` method in `TransformerBlock` looks like:
    # `def call(self, inputs, training=None):`
    # (with a default `None`).
    # If you remove the default, then Keras doesn't know how to fill it automatically
    # in the functional API graph building unless you explicitly provide it in `Input`.

    # Let's fix your `TransformerBlock` call method first, as it's the more Keras-idiomatic way.
    # I will provide the updated TransformerBlock first.

    # Let's assume you have updated TransformerBlock's call method:
    # `def call(self, inputs, training=None):`
    # If so, then `x = transformer_block(x)` should work.
    # If not, you *must* pass it.

    # Given your current error, `training` is *missing*, meaning no default.
    # The most direct fix is to ensure the functional API correctly provides it.
    # Keras usually provides it automatically if the signature is `call(self, inputs, training=None)`

    # Temporary explicit fix:
    # x = transformer_block(x, training=True) # This forces training mode, not ideal for inference
    # Better: modify TransformerBlock's call signature.

    # Let's assume you've used the recommended signature `def call(self, inputs, training=None):`
    # then the following line is correct as is:
    x = transformer_block(x) # Call the block without explicitly passing 'training' if it has a default

    # Flatten the output from the TransformerBlock so that the Dense layer can operate on a single vector per sample.
    x = layers.Flatten()(x) # Shape: (None, LOOK_BACK * EMBED_DIM)

    # The Dense layer now outputs a flat vector of `predict_ahead * num_features`
    outputs = layers.Dense(output_sequence_length * output_features_count)(x) # Shape: (None, PREDICT_AHEAD * NUM_FEATURES)

    # Reshape to the desired output sequence shape
    outputs = layers.Reshape((output_sequence_length, output_features_count))(outputs) # Shape: (None, PREDICT_AHEAD, NUM_FEATURES)

    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

In [13]:
# --- 5. Training Function ---
def train_transformer_ts(X_scaled, Y_scaled, input_features_count, output_features_count, look_back, predict_ahead, epochs=50, batch_size=32):
    """
    Trains the Transformer time series prediction model.

    Args:
        X_scaled (numpy.ndarray): Scaled input sequences.
        Y_scaled (numpy.ndarray): Scaled output sequences.
        input_features_count (int): Number of features in input.
        output_features_count (int): Number of features in output.
        look_back (int): Look-back window size.
        predict_ahead (int): Number of future steps to predict.
        epochs (int): Number of training epochs.
        batch_size (int): Batch size for training.

    Returns:
        keras.Model: The trained Transformer model.
    """
    # Build the model
    model = build_transformer_model(
        input_shape=(look_back, input_features_count),
        output_sequence_length=predict_ahead,
        output_features_count=output_features_count
    )

    # Compile the model
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")

    # Train the model
    print(f"Training Transformer model for {epochs} epochs...")
    model.fit(X_scaled, Y_scaled, epochs=epochs, batch_size=batch_size, verbose=0)
    print("Training complete.")
    return model

In [14]:
# --- 6. Prediction Function ---
def predict_future_values(model, last_input_sequence_scaled, scaler_output):
    """
    Predicts future values using the trained Transformer model.

    Args:
        model (keras.Model): The trained Transformer model.
        last_input_sequence_scaled (numpy.ndarray): The last sequence of input data, scaled.
                                                    Shape: (1, look_back, num_features).
        scaler_output (sklearn.preprocessing.MinMaxScaler): The scaler used for output features.

    Returns:
        numpy.ndarray: The inverse-transformed predicted future values.
                       Shape: (predict_ahead, num_features).
    """
    # Ensure the input sequence has the correct shape for prediction (batch_size, look_back, num_features)
    if last_input_sequence_scaled.ndim == 2:
        last_input_sequence_scaled = np.expand_dims(last_input_sequence_scaled, axis=0)

    # Make prediction
    predicted_scaled = model.predict(last_input_sequence_scaled, verbose=0)

    # Reshape the predicted output to 2D for inverse transformation
    # The model outputs (batch_size, predict_ahead, num_features)
    # We need (predict_ahead * num_features) for the scaler, then reshape back
    predicted_scaled_2d = predicted_scaled.reshape(-1, predicted_scaled.shape[-1])

    # Inverse transform the prediction
    predicted_original = scaler_output.inverse_transform(predicted_scaled_2d)

    # Reshape back to (predict_ahead, num_features)
    predicted_original = predicted_original.reshape(predicted_scaled.shape[1], predicted_scaled.shape[2])

    return predicted_original


# --- Example Usage (using the uploaded CSV content) ---

# This part demonstrates how to use the functions defined above
# In a real application, X_scaled, Y_scaled, scaler_input, scaler_output would be
# provided as function arguments based on external data.

# Placeholder for the actual content of 'AAPL_merged_combined_data_2025-07-11.csv'
# In a real scenario, this would be read from the file directly.
# For demonstration, I'll use a simplified string based on the fetched content.
# IMPORTANT: Replace this with the actual content fetched from the file for full functionality.
ticker = "AAPL"
input_data = "data/" + ticker + "_market_data.csv"
predicted_data = "data/" + ticker + "_predicted_data.csv"

# 1. Load and preprocess data
scaled_data, scaler_input, scaler_output, features = load_and_preprocess_data(input_data)

# Determine the number of features based on the preprocessed data
NUM_FEATURES = scaled_data.shape[1]

# 2. Create sequences
X, Y = create_sequences(scaled_data, LOOK_BACK, PREDICT_AHEAD)

# Ensure X and Y are reshaped correctly for the model
# X: (samples, look_back, num_features)
# Y: (samples, predict_ahead, num_features)

# For demonstration, let's take a subset if the data is too large for quick execution
# Or ensure LOOK_BACK and PREDICT_AHEAD are reasonable for the given data length.
# Assuming enough data exists for at least one sequence.

# Get the number of features from the scaled data
input_features_count = scaled_data.shape[1]
output_features_count = scaled_data.shape[1] # Assuming predicting all features

# 3. Train the model
# Note: For actual training, you would typically split data into training and validation sets.
# For this example, we'll train on all generated sequences.
if X.shape[0] > 0: # Check if sequences were created
    model = train_transformer_ts(
        X_scaled=X,
        Y_scaled=Y,
        input_features_count=input_features_count,
        output_features_count=output_features_count,
        look_back=LOOK_BACK,
        predict_ahead=PREDICT_AHEAD,
        epochs=1,  # Set a small number of epochs for demonstration
        batch_size=1
    )

    # 4. Make a prediction
    # Get the last sequence from the original scaled data to predict future values
    last_input_sequence_scaled = scaled_data[-LOOK_BACK:]

    if last_input_sequence_scaled.shape[0] == LOOK_BACK:
        predicted_future_values = predict_future_values(model, last_input_sequence_scaled, scaler_output)
        print("\nPredicted Future Values (original scale) for next", PREDICT_AHEAD, "steps across", NUM_FEATURES, "features:")

        # --- NEW CODE TO ADD DATES ---
        # 1. Load the original DataFrame to get the last date
        original_df = pd.read_csv(input_data) # Re-read for full dataframe
        original_df['date'] = pd.to_datetime(original_df['date'])
        last_known_date = original_df['date'].iloc[-1]

        # 2. Generate future dates
        future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1),
                                     periods=PREDICT_AHEAD,
                                     freq='B') # 'B' for business day frequency

        # 3. Create a DataFrame for better readability of predictions with dates
        predicted_df = pd.DataFrame(predicted_future_values, columns=features)
        predicted_df.insert(0, 'date', future_dates) # Insert 'date' column at the beginning
        save_dataframe_to_csv(predicted_df, ticker, predicted_data)
    else:
        print("Not enough data to create the last input sequence for prediction.")
else:
    print("Not enough data to create sequences for training and prediction. Adjust LOOK_BACK or PREDICT_AHEAD constants or provide more data.")


Loading data from data/AAPL_market_data.csv...
Initial DataFrame shape: (3776, 9)
Training Transformer model for 1 epochs...
Training complete.

Predicted Future Values (original scale) for next 30 steps across 9 features:

DataFrame successfully saved to 'data/AAPL_predicted_data.csv'
