## Imports

In [None]:
from typing import List, Tuple
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers # type: ignore
from datetime import datetime, timedelta
import io
import warnings

# Suppress FutureWarning from scikit-learn
warnings.simplefilter(action='ignore', category=FutureWarning)

## Utilility Functions

In [120]:
def save_dataframe_to_csv(df: pd.DataFrame, ticker: str, filename: str = None) -> bool:
    """
    Saves a pandas DataFrame to a CSV file.
    """
    if not isinstance(df, pd.DataFrame) or df.empty:
        print("Error: Input is not a valid or non-empty DataFrame. Nothing to save.")
        return False

    if filename is None:
        filename = f"{ticker}_market_data.csv"

    try:
        # Save the DataFrame to a CSV file.
        # The index (which is the date) is crucial, so we ensure it's saved.
        df.to_csv(filename, index=True)
        print(f"\nDataFrame successfully saved to '{filename}'")
        return True
    except IOError as e:
        # Handle specific file system errors
        print(f"\nAn I/O error occurred while saving the file: {e}")
        return False

In [121]:
# 1. Global Constants

LOOK_BACK = 130       # Number of past time steps the model looks at (approx 6 months of trading days)
PREDICT_AHEAD = 10    # Number of future time steps the model predicts
EMBED_DIM = 256       # Embedding dimension for the Transformer (must be divisible by NUM_HEADS)
NUM_HEADS = 16        # Number of attention heads (EMBED_DIM / NUM_HEADS = 256 / 16 = 16, which is valid)
FF_DIM = 1024         # Hidden layer size in the feed-forward network
DROPOUT_RATE = 0.2    # Dropout rate for regularization
NUM_TRANSFORMER_BLOCKS = 4 # Number of Transformer Blocks to stack


In [122]:
# 2. Data Loading and Preprocessing
def load_and_preprocess_data(csv_file_path):
    """
    Loads data from a CSV file path, separates input and target features, and scales them.

    Args:
        csv_file_path (str): The path to the CSV file.

    Returns:
        tuple: A tuple containing:
            - numpy.ndarray: The scaled input features data (X).
            - numpy.ndarray: The scaled target feature data (Y).
            - sklearn.preprocessing.MinMaxScaler: Scaler fitted on input features.
            - sklearn.preprocessing.MinMaxScaler: Scaler fitted on target feature.
            - list: List of input feature names.
            - list: List of target feature names.
    """
    print(f"Loading data from {csv_file_path}...")
    df = pd.read_csv(csv_file_path)

    df['date'] = pd.to_datetime(df['date'])
    df = df.set_index('date')

    # Define input features (used by the model to predict)
    input_features = ['RSI', 'MACD', 'MACD_Signal', 'Momentum', 'OBV', 'ATR', 'Revenue_Growth', 'EPS_Growth', 'ROE']

    # Define the single target feature to be predicted
    target_feature = ['Daily_Return'] # Predicting 'Daily_Return'

    # Select and preprocess input features
    df_input_features = df[input_features]
    df_input_features = df_input_features.fillna(method='ffill').fillna(method='bfill')
    print(f"Input DataFrame shape: {df_input_features.shape}")

    # Select and preprocess target feature
    df_target_feature = df[target_feature]
    df_target_feature = df_target_feature.fillna(method='ffill').fillna(method='bfill')
    print(f"Target DataFrame shape: {df_target_feature.shape}")

    # Initialize separate scalers for input and target features
    scaler_input = MinMaxScaler(feature_range=(0, 1))
    scaler_output = MinMaxScaler(feature_range=(0, 1)) # This scaler is for the target

    # Fit and transform input features
    scaled_input_data = scaler_input.fit_transform(df_input_features)

    # Fit and transform target feature
    scaled_target_data = scaler_output.fit_transform(df_target_feature)

    return scaled_input_data, scaled_target_data, scaler_input, scaler_output, input_features, target_feature

In [123]:
# 3. Sequence Creation
def create_sequences(input_data, target_data, look_back, predict_ahead):
    """
    Creates input (X) and target (Y) sequences for time series prediction.

    Args:
        input_data (numpy.ndarray): The scaled input features data.
        target_data (numpy.ndarray): The scaled target feature data.
        look_back (int): Number of past time steps to use as input.
        predict_ahead (int): Number of future time steps to predict.

    Returns:
        tuple: A tuple containing:
            - numpy.ndarray: Input sequences (X).
            - numpy.ndarray: Target sequences (Y).
    """
    X, Y = [], []
    # Ensure we don't go out of bounds for both input and target data
    # The latest possible starting point for a sequence is when 
    # (i + look_back + predict_ahead) is still within the bounds of the data.
    for i in range(len(input_data) - look_back - predict_ahead + 1):
        # X is the sequence of input features for 'look_back' steps
        X.append(input_data[i:(i + look_back)])
        # Y is the sequence of target feature (Daily_Return) for 'predict_ahead' steps
        Y.append(target_data[(i + look_back):(i + look_back + predict_ahead)])
    return np.array(X), np.array(Y)

In [124]:
#  4.1 Transformer Model Architecture

class MultiHeadSelfAttention(layers.Layer):
    """
    Multi-Head Self-Attention layer as described in the Transformer paper.
    """
    def __init__(self, embed_dim, num_heads=8, **kwargs):
        super(MultiHeadSelfAttention, self).__init__(**kwargs)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        query = self.separate_heads(query, batch_size)
        key = self.separate_heads(key, batch_size)
        value = self.separate_heads(value, batch_size)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
        output = self.combine_heads(concat_attention)
        return output

In [125]:
#  4.2 Transformer Model Architecture

class TransformerBlock(layers.Layer):
    """
    A single Transformer block combining Multi-Head Attention and a Feed-Forward Network.
    """
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super(TransformerBlock, self).__init__(**kwargs)
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=None): # <--- FIX IS HERE: Add `training=None`
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [126]:
#  4.3 Transformer Model Architecture

class PositionalEmbedding(layers.Layer):
    """
    Positional Embedding layer to inject sequence order information.
    """
    def __init__(self, sequence_length, embed_dim, **kwargs):
        super(PositionalEmbedding, self).__init__(**kwargs)
        self.sequence_length = sequence_length
        self.embed_dim = embed_dim
        self.position_embedding = layers.Embedding(
            input_dim=sequence_length, output_dim=embed_dim
        )
        self.scale = tf.math.sqrt(tf.cast(embed_dim, tf.float32))

    def call(self, inputs):
        length = tf.shape(inputs)[1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_positions = self.position_embedding(positions)
        return inputs * self.scale + embedded_positions

In [127]:
# 5. Model Training
def build_transformer_model(input_shape, output_sequence_length, output_features_count, num_transformer_blocks=1):
    """
    Builds the Transformer model for time series prediction with multiple Transformer blocks.

    Args:
        input_shape (tuple): Shape of the input sequences (look_back, num_input_features).
        output_sequence_length (int): The length of the output sequence (predict_ahead).
        output_features_count (int): The number of features in the output (now 1 for 'Daily_Return').
        num_transformer_blocks (int): The number of TransformerBlock layers to stack.

    Returns:
        keras.Model: The compiled Transformer model.
    """
    inputs = layers.Input(shape=input_shape) # (None, LOOK_BACK, num_input_features)

    x = layers.TimeDistributed(layers.Dense(EMBED_DIM))(inputs) # Shape: (None, LOOK_BACK, EMBED_DIM)

    x = PositionalEmbedding(input_shape[0], EMBED_DIM)(x) # Shape: (None, LOOK_BACK, EMBED_DIM)

    # Stack multiple Transformer blocks
    for _ in range(num_transformer_blocks):
        transformer_block = TransformerBlock(EMBED_DIM, NUM_HEADS, FF_DIM, DROPOUT_RATE)
        x = transformer_block(x) # Shape: (None, LOOK_BACK, EMBED_DIM)

    x = layers.Flatten()(x) # Shape: (None, LOOK_BACK * EMBED_DIM)

    outputs = layers.Dense(output_sequence_length * output_features_count)(x) # Shape: (None, PREDICT_AHEAD * 1)

    outputs = layers.Reshape((output_sequence_length, output_features_count))(outputs) # Shape: (None, PREDICT_AHEAD, 1)

    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

In [128]:
# --- 5. Training Function ---
def train_transformer_ts(X_scaled, Y_scaled, input_features_count, output_features_count, look_back, predict_ahead, epochs=50, batch_size=32, num_transformer_blocks=1):
    """
    Trains the Transformer time series prediction model.

    Args:
        X_scaled (numpy.ndarray): The scaled input sequences.
        Y_scaled (numpy.ndarray): The scaled target sequences.
        input_features_count (int): Number of features in the input data.
        output_features_count (int): Number of features in the output (target) data (now 1).
        look_back (int): The sequence length for inputs.
        predict_ahead (int): The sequence length for outputs (predictions).
        epochs (int): Number of training epochs.
        batch_size (int): Batch size for training.
        num_transformer_blocks (int): Number of Transformer blocks to use in the model.

    Returns:
        keras.Model: The trained Transformer model.
    """
    model = build_transformer_model(
        input_shape=(look_back, input_features_count),
        output_sequence_length=predict_ahead,
        output_features_count=output_features_count,
        num_transformer_blocks=num_transformer_blocks
    )

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")

    print(f"Training Transformer model for {epochs} epochs with {num_transformer_blocks} blocks...")
    # It's highly recommended to use a validation split to monitor for overfitting
    model.fit(X_scaled, Y_scaled, epochs=epochs, batch_size=batch_size, verbose=1, validation_split=0.2)
    print("Training complete.")
    return model


In [129]:
# --- 6. Prediction Function ---
def predict_future_values(model, last_input_sequence_scaled, scaler_output):
    """
    Predicts future values using the trained Transformer model.

    Args:
        model (keras.Model): The trained Transformer model.
        last_input_sequence_scaled (numpy.ndarray): The last sequence of input data, scaled.
                                                    Shape: (look_back, num_input_features) before expansion.
        scaler_output (sklearn.preprocessing.MinMaxScaler): The scaler used for the single target feature.

    Returns:
        numpy.ndarray: The inverse-transformed predicted future values.
                       Shape: (predict_ahead, 1).
    """
    # Ensure the input sequence has the correct shape for prediction (batch_size, look_back, num_input_features)
    if last_input_sequence_scaled.ndim == 2:
        last_input_sequence_scaled = np.expand_dims(last_input_sequence_scaled, axis=0)

    # Make prediction
    predicted_scaled = model.predict(last_input_sequence_scaled, verbose=0)
    # After prediction, predicted_scaled will have shape (1, PREDICT_AHEAD, 1)

    # Reshape the predicted output to 2D for inverse transformation: (PREDICT_AHEAD, 1)
    # The scaler expects a 2D array where columns are features. Since we have 1 feature,
    # we reshape from (1, PREDICT_AHEAD, 1) to (PREDICT_AHEAD, 1).
    predicted_scaled_2d = predicted_scaled.reshape(predicted_scaled.shape[1], predicted_scaled.shape[2])

    # Inverse transform the prediction
    predicted_original = scaler_output.inverse_transform(predicted_scaled_2d)

    # predicted_original is already (PREDICT_AHEAD, 1) after inverse_transform, no further reshape needed if it's the only target
    # However, the line below is robust if shape[1] is PREDICT_AHEAD and shape[2] is 1.
    # It's technically redundant if predicted_scaled_2d was already (PREDICT_AHEAD, 1)
    # but doesn't hurt.
    # If you want to explicitly ensure (PREDICT_AHEAD, 1) without relying on reshape's behavior:
    # predicted_original = predicted_original.flatten().reshape(-1, 1)
    # Or simply: return predicted_original if it's already (PREDICT_AHEAD, 1)

    return predicted_original

In [130]:
# --- 5. Prediction Function ---
def predict_future_values(model, last_input_sequence_scaled, scaler_output):
    """
    Predicts future daily returns using the trained Transformer model.

    Args:
        model (keras.Model): The trained Transformer model.
        last_input_sequence_scaled (numpy.ndarray): The last sequence of input data, scaled.
                                                    Shape: (look_back, num_input_features) before expansion.
        scaler_output (sklearn.preprocessing.MinMaxScaler): The scaler used for the 'Daily_Return' target feature.

    Returns:
        numpy.ndarray: The inverse-transformed predicted future daily returns.
                       Shape: (predict_ahead, 1).
    """
    if last_input_sequence_scaled.ndim == 2:
        last_input_sequence_scaled = np.expand_dims(last_input_sequence_scaled, axis=0)

    predicted_scaled_returns = model.predict(last_input_sequence_scaled, verbose=0)
    
    predicted_scaled_returns_2d = predicted_scaled_returns.reshape(predicted_scaled_returns.shape[1], predicted_scaled_returns.shape[2])

    predicted_original_returns = scaler_output.inverse_transform(predicted_scaled_returns_2d)
    
    return predicted_original_returns

In [131]:
def reconstruct_prices_dataframe(last_known_close_price, future_dates, predicted_daily_returns):
    """
    Reconstructs future close prices from predicted daily returns and creates a DataFrame.

    Args:
        last_known_close_price (float): The actual closing price of the last day in the input sequence.
        future_dates (pd.DatetimeIndex or list of datetime): The dates for the future predictions.
        predicted_daily_returns (numpy.ndarray): The predicted daily returns (unscaled).
                                                  Shape: (PREDICT_AHEAD, 1).

    Returns:
        pd.DataFrame: A DataFrame with 'date', 'predicted daily return', and 'predicted price'.
    """
    future_close_prices = []
    current_price = last_known_close_price
    
    # Ensure predicted_daily_returns is flat for iteration
    predicted_daily_returns_flat = predicted_daily_returns.flatten()

    for i in range(len(predicted_daily_returns_flat)):
        daily_return = predicted_daily_returns_flat[i] if not np.isnan(predicted_daily_returns_flat[i]) else 0.0
        current_price = current_price * (1 + daily_return)
        future_close_prices.append(current_price)
    
    # Create the DataFrame
    reconstructed_df = pd.DataFrame({
        'date': future_dates,
        'predicted daily return': predicted_daily_returns_flat,
        'predicted price': future_close_prices
    })
    
    return reconstructed_df

In [132]:
# --- Example Usage ---
if __name__ == '__main__':
    symbol_to_process = "AAPL"
    fixed_start_date = datetime(2010, 4, 27)
    fixed_end_date = datetime(2024, 12, 31)
    fmp_api_key = "YOUR_FMP_API_KEY" # <<<<<<< REMEMBER TO REPLACE THIS

    # Step 1: Prepare the DataFrame (fetches data and calculates indicators - returns UNCALED data)
    prepared_df = prepare_dataframe(symbol_to_process, fixed_start_date, fixed_end_date, fmp_api_key)

    if prepared_df is not None and not prepared_df.empty:
        print("\n--- First few rows of Prepared DataFrame (UNSCALED) ---")
        print(prepared_df.head())
        print("\n--- Last few rows of Prepared DataFrame (UNSCALED) ---")
        print(prepared_df.tail())
        print(f"\nPrepared DataFrame shape: {prepared_df.shape}")
        print(f"Date range of the final prepared_df: {prepared_df.index.min()} to {prepared_df.index.max()}")

        # Step 2: Normalize the prepared data
        scaled_input_data, scaled_target_data, scaler_input, scaler_output = normalize_prepared_data(
            prepared_df, INPUT_FEATURES, OUTPUT_TARGETS
        )

        if scaled_input_data is not None and scaled_input_data.size > 0:
            # Step 3: Create training and testing datasets (uses already scaled data)
            X_sequences, Y_sequences = create_training_testing_datasets(
                scaled_input_data, scaled_target_data, LOOK_BACK, PREDICT_AHEAD
            )

            if X_sequences is not None and X_sequences.size > 0:
                NUM_INPUT_FEATURES = X_sequences.shape[2]
                NUM_TARGET_FEATURES = Y_sequences.shape[2]

                # Step 4: Train the model
                model = train_transformer_ts(
                    X_scaled=X_sequences,
                    Y_scaled=Y_sequences,
                    input_features_count=NUM_INPUT_FEATURES,
                    output_features_count=NUM_TARGET_FEATURES,
                    look_back=LOOK_BACK,
                    predict_ahead=PREDICT_AHEAD,
                    epochs=100,
                    batch_size=32,
                    num_transformer_blocks=NUM_TRANSFORMER_BLOCKS
                )

                # Step 5: Make a prediction
                last_input_raw = prepared_df[INPUT_FEATURES].iloc[-LOOK_BACK:]

                if last_input_raw.shape[0] == LOOK_BACK:
                    last_input_sequence_scaled = scaler_input.transform(last_input_raw)
                    
                    # Get the last known close price from the prepared_df for reconstruction
                    last_known_close_price = prepared_df['close'].iloc[-1]

                    # Predict original daily returns
                    predicted_original_returns = predict_future_values(
                        model, last_input_sequence_scaled, scaler_output
                    )
                    
                    last_known_date = prepared_df.index[-1]

                    future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1),
                                                 periods=PREDICT_AHEAD,
                                                 freq='B')

                    # NEW: Use the reconstruct_prices_dataframe function
                    predicted_df = reconstruct_prices_dataframe(
                        last_known_close_price, future_dates, predicted_original_returns
                    )
                    
                    print(f"\nPredicted Future Values (Daily Returns and Reconstructed Prices) for next {PREDICT_AHEAD} steps:")
                    print(predicted_df)
                else:
                    print(f"Not enough data in prepared_df ({last_input_raw.shape[0]} rows) to create the last input sequence for prediction (requires {LOOK_BACK} rows).")
            else:
                print("Not enough data to create sequences for training and prediction. Adjust LOOK_BACK or PREDICT_AHEAD constants or provide more data.")
        else:
            print("Normalization failed or resulted in empty scaled data. Cannot proceed with sequence creation.")
    else:
        print("Prepared DataFrame is empty or None. Cannot proceed with model training and prediction.")



NameError: name 'datetime' is not defined