In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import random
import os  # For file path handling

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split # Used in Model 4
from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    r2_score,
    mean_absolute_percentage_error
)

# Keras / TensorFlow specific imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    LSTM, Dense, Dropout, BatchNormalization, LeakyReLU
)
from tensorflow.keras.callbacks import (
    EarlyStopping, ReduceLROnPlateau
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import Huber

# TCN specific import (make sure tcn library is installed: pip install tensorflow-tcn)
try:
    from tcn import TCN
    tcn_available = True
except ImportError:
    print("Warning: TCN library not found. Model 2 (TCN) will be skipped.")
    print("Install using: pip install tensorflow-tcn")
    tcn_available = False

# --- Global Settings ---
SEED = 44
tf.keras.utils.set_random_seed(SEED)
np.random.seed(SEED) # Also set numpy seed for consistency
random.seed(SEED)

FILE_PATH = "starbucks_open_7year - starbucks_open_7year.csv.csv" # Adjust if needed
SEQ_LENGTH = 60  # Sequence length for time series models (Models 1, 2, 4)
FUTURE_DAYS_TO_PREDICT = 30 # How many days into the future to predict


# --- Data Loading and Initial Preparation ---
def load_and_prepare_data(file_path):
    """Loads, cleans, and prepares the initial Starbucks stock data."""
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Error: Data file not found at {file_path}")

    df = pd.read_csv(file_path)
    print("--- Initial Data Info ---")
    print(df.head())
    print(f"\nMissing values before fill:\n{df.isna().sum()}")

    # Handle potential missing values (using forward fill as in original scripts)
    df.ffill(inplace=True)
    print(f"\nMissing values after ffill:\n{df.isna().sum()}")

    # Convert Date and set as index
    df["Date"] = pd.to_datetime(df["Date"], errors='coerce') # Added errors='coerce' for robustness
    df.dropna(subset=['Date'], inplace=True) # Drop rows where date conversion failed
    df = df.set_index("Date").sort_index()

    # Log transform (common for stock prices)
    # Use a small epsilon to avoid log(0) if 'Open' could be 0
    epsilon = 1e-9
    df["Log_Open"] = np.log(df["Open"] + epsilon)

    # Add smoothed version (used in Model 4)
    df['Smoothed_Log_Open'] = df['Log_Open'].rolling(window=3).mean().fillna(df['Log_Open'])

    print("\n--- Data After Initial Prep ---")
    print(df.head())
    print(f"\nData shape: {df.shape}")
    return df

# --- Data Preparation Utilities ---
def create_sequences(data, seq_length):
    """Creates sequences for time series forecasting models."""
    X, Y = [], []
    for i in range(len(data) - seq_length): # Adjusted range to prevent index out of bounds
        X.append(data[i:(i + seq_length), 0])
        Y.append(data[i + seq_length, 0])
    if not X: # Handle cases where data is too short for sequence length
        return np.array([]), np.array([])
    return np.array(X), np.array(Y)

def create_lag_features(data, window):
    """Creates lag features (alternative sequence creation for Model 4)."""
    X, y = [], []
    for i in range(window, len(data)):
        X.append(data[i - window:i])
        y.append(data[i])
    return np.array(X), np.array(y)

# --- Model Definitions ---

# Model 1: LSTM
def build_lstm_model(seq_length):
    """Builds the LSTM model architecture."""
    model = Sequential(name="LSTM_Model")
    # Using tanh activation (common for LSTM) and specifying recurrent_activation
    model.add(LSTM(100, activation="tanh", recurrent_activation='sigmoid', return_sequences=True,
                   recurrent_dropout=0.2, input_shape=(seq_length, 1)))
    model.add(Dropout(0.2))
    model.add(LSTM(80, activation="tanh", recurrent_activation='sigmoid', recurrent_dropout=0.3)) # Removed return_sequences=False (default)
    model.add(Dropout(0.3))
    model.add(Dense(50, activation="relu", kernel_regularizer=l2(0.01)))
    model.add(Dense(30, activation="relu"))
    model.add(Dense(15, activation="relu"))
    model.add(Dense(1)) # Linear activation for regression output
    model.compile(optimizer="adam", loss="mean_squared_error")
    return model

# Model 2: TCN
def build_tcn_model(seq_length):
    """Builds the TCN model architecture."""
    if not tcn_available:
        return None
    model = Sequential(name="TCN_Model")
    model.add(TCN(input_shape=(seq_length, 1),
                  nb_filters=128,
                  kernel_size=3,
                  nb_stacks=1,
                  dilations=[1, 2, 4, 8, 16],
                  padding='causal', # Causal padding is important for time series
                  use_skip_connections=True,
                  dropout_rate=0.1, # Original dropout was after TCN layer
                  activation='relu')) # Specify activation within TCN
    # model.add(Dropout(0.1)) # Dropout is often applied within TCN layer now
    model.add(Dense(1)) # Linear activation for regression output
    # Using Adam optimizer and Huber loss as in the original script
    model.compile(optimizer=Adam(learning_rate=0.001), loss=Huber(), metrics=['mae'])
    return model

# Model 3: Polynomial Regression DNN
def build_poly_dnn_model(input_shape):
    """Builds the DNN model for polynomial features."""
    model = Sequential(name="Polynomial_DNN_Model")
    model.add(Dense(512, activation='relu', input_dim=input_shape)) # Correct input_dim usage
    model.add(Dropout(0.2))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(128)) # Consider adding activation here if needed, e.g., 'relu'
    model.add(LeakyReLU(alpha=0.1)) # Applied after the Dense layer
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1)) # Linear activation for regression output
    model.compile(optimizer='adam', loss='mse') # Using Adam and MSE as in original
    return model

# Model 4: Lag Feature DNN
def build_lag_dnn_model(input_shape):
    """Builds the DNN model for lagged features."""
    model = Sequential(name="Lag_Feature_DNN_Model")
    model.add(Dense(256, activation='relu', input_shape=(input_shape,), kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1)) # Linear activation for regression output
    optimizer = Adam(learning_rate=0.0005) # Using Adam optimizer as in original
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return model

# --- Training and Evaluation ---
def evaluate_model(y_true, y_pred, model_name, data_type="Test"):
    """Calculates and prints evaluation metrics."""
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)

    print(f"\n--- {model_name} - {data_type} Set Evaluation ---")
    print(f"MAE:  {mae:.4f}")
    print(f"MSE:  {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R2:   {r2:.4f}")
    print(f"MAPE: {mape:.4f}")
    return {"MAE": mae, "MSE": mse, "RMSE": rmse, "R2": r2, "MAPE": mape}

def plot_training_history(history, model_name):
    """Plots the training and validation loss."""
    if history and 'loss' in history.history and 'val_loss' in history.history:
        plt.figure(figsize=(10, 5))
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title(f'{model_name} - Training History')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True)
        plt.show()
    else:
        print(f"No training history found or history object incomplete for {model_name}.")


def plot_predictions(dates, y_actual, y_pred, model_name, title_suffix="Prediction"):
    """Plots actual vs. predicted values."""
    plt.figure(figsize=(12, 6))
    plt.plot(dates, y_actual, label='Actual Price', color='blue', marker='.', linestyle='-')
    plt.plot(dates, y_pred, label='Predicted Price', color='red', marker='.', linestyle='--')
    plt.title(f'Starbucks Stock Price - {model_name} {title_suffix}')
    plt.xlabel('Date')
    plt.ylabel('Stock Price (Log or Actual)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# --- Future Prediction Function ---
def predict_future(model, last_sequence, seq_length, future_days, scaler):
    """Predicts future stock prices iteratively."""
    predictions_scaled = []
    current_sequence = last_sequence.copy().reshape(1, seq_length, 1) # Reshape for model input

    for _ in range(future_days):
        # Predict the next step
        next_pred_scaled = model.predict(current_sequence, verbose=0)[0, 0]
        predictions_scaled.append(next_pred_scaled)

        # Prepare the next input sequence: roll and append prediction
        # Create the new sequence for the next prediction step
        # Remove the first element and append the prediction at the end
        next_sequence_values = np.append(current_sequence[0, 1:, 0], next_pred_scaled)
        current_sequence = next_sequence_values.reshape(1, seq_length, 1)


    # Inverse transform the predictions
    future_predictions = scaler.inverse_transform(np.array(predictions_scaled).reshape(-1, 1))
    return future_predictions

# --- Main Execution ---
if __name__ == "__main__":
    # 1. Load Data
    try:
        sb_df = load_and_prepare_data(FILE_PATH)
    except FileNotFoundError as e:
        print(e)
        exit() # Exit if data file is not found

    # --- Preparation for Time Series Models (1, 2, 4) ---
    print("\n--- Preparing Data for Time Series Models (LSTM, TCN, Lag DNN) ---")
    # Using Log_Open for Models 1 & 2, Smoothed_Log_Open for Model 4
    data_log_open = sb_df[["Log_Open"]].values
    data_smoothed_log_open = sb_df[["Smoothed_Log_Open"]].values

    # Scale data
    scaler_log = MinMaxScaler(feature_range=(0, 1))
    scaled_log_data = scaler_log.fit_transform(data_log_open)

    scaler_smoothed = MinMaxScaler(feature_range=(0, 1))
    scaled_smoothed_data = scaler_smoothed.fit_transform(data_smoothed_log_open)

    # Create sequences for LSTM/TCN (using Log_Open)
    X_seq, Y_seq = create_sequences(scaled_log_data, SEQ_LENGTH)

    # Create lag features for Lag DNN (using Smoothed_Log_Open)
    X_lag, y_lag = create_lag_features(scaled_smoothed_data, window=SEQ_LENGTH) # window = SEQ_LENGTH for consistency

    if X_seq.size == 0 or X_lag.size == 0:
        print("Error: Not enough data to create sequences/lags with the specified length.")
        exit()

    # Split data (TimeSeries Split - chronological)
    # Using common split ratios from Model 1 (70/15/15) for consistency
    train_size_idx = int(len(X_seq) * 0.7)
    val_size_idx = int(len(X_seq) * (0.7 + 0.15))

    X_train_seq, Y_train_seq = X_seq[:train_size_idx], Y_seq[:train_size_idx]
    X_val_seq, Y_val_seq = X_seq[train_size_idx:val_size_idx], Y_seq[train_size_idx:val_size_idx]
    X_test_seq, Y_test_seq = X_seq[val_size_idx:], Y_seq[val_size_idx:]

    # Split for Lag DNN (using same indices for comparable test set size)
    X_train_lag, y_train_lag = X_lag[:train_size_idx], y_lag[:train_size_idx]
    X_val_lag, y_val_lag = X_lag[train_size_idx:val_size_idx], y_lag[train_size_idx:val_size_idx]
    X_test_lag, y_test_lag = X_lag[val_size_idx:], y_lag[val_size_idx:]


    # Reshape/Flatten inputs
    X_train_lstm_tcn = np.reshape(X_train_seq, (X_train_seq.shape[0], X_train_seq.shape[1], 1))
    X_val_lstm_tcn = np.reshape(X_val_seq, (X_val_seq.shape[0], X_val_seq.shape[1], 1))
    X_test_lstm_tcn = np.reshape(X_test_seq, (X_test_seq.shape[0], X_test_seq.shape[1], 1))

    X_train_lag_flat = X_train_lag.reshape(X_train_lag.shape[0], -1)
    X_val_lag_flat = X_val_lag.reshape(X_val_lag.shape[0], -1)
    X_test_lag_flat = X_test_lag.reshape(X_test_lag.shape[0], -1)

    print(f"LSTM/TCN Train shape: X-{X_train_lstm_tcn.shape}, Y-{Y_train_seq.shape}")
    print(f"LSTM/TCN Val shape:   X-{X_val_lstm_tcn.shape}, Y-{Y_val_seq.shape}")
    print(f"LSTM/TCN Test shape:  X-{X_test_lstm_tcn.shape}, Y-{Y_test_seq.shape}")
    print(f"Lag DNN Train shape: X-{X_train_lag_flat.shape}, Y-{y_train_lag.shape}")
    print(f"Lag DNN Val shape:   X-{X_val_lag_flat.shape}, Y-{y_val_lag.shape}")
    print(f"Lag DNN Test shape:  X-{X_test_lag_flat.shape}, Y-{y_test_lag.shape}")

    # --- Preparation for Polynomial DNN Model (Model 3) ---
    print("\n--- Preparing Data for Polynomial DNN Model ---")
    df_poly = sb_df.copy() # Use a copy to avoid modifying the original df

    # Feature Engineering (Days, Days^2, Days^3) - applied to the original 'Open' price
    df_poly['Days'] = (df_poly.index - df_poly.index.min()).days
    df_poly['Days2'] = df_poly['Days'] ** 2
    df_poly['Days3'] = df_poly['Days'] ** 3

    X_poly_feat = df_poly[['Days', 'Days2', 'Days3']].values.astype(float)
    y_poly_raw = df_poly['Open'].values.astype(float).reshape(-1, 1) # Target is raw 'Open'

    # Scale features and target separately for this model
    scaler_X_poly = MinMaxScaler()
    X_poly_scaled = scaler_X_poly.fit_transform(X_poly_feat)

    scaler_y_poly = MinMaxScaler()
    y_poly_scaled = scaler_y_poly.fit_transform(y_poly_raw)

    # Split data (using standard train_test_split, 80/20 split as per original)
    # Note: This split is NOT chronological, unlike the time series models
    X_train_poly, X_test_poly, y_train_poly, y_test_poly = train_test_split(
        X_poly_scaled, y_poly_scaled, test_size=0.20, random_state=SEED, shuffle=False # Maintain order if possible
    )
    # Further split training into train/validation (e.g., 80% of original train -> 64% total)
    X_train_poly, X_val_poly, y_train_poly, y_val_poly = train_test_split(
         X_train_poly, y_train_poly, test_size=0.1875, random_state=SEED, shuffle=False # 0.15 / 0.8 = 0.1875
    )


    print(f"Poly DNN Train shape: X-{X_train_poly.shape}, Y-{y_train_poly.shape}")
    print(f"Poly DNN Val shape:   X-{X_val_poly.shape}, Y-{y_val_poly.shape}")
    print(f"Poly DNN Test shape:  X-{X_test_poly.shape}, Y-{y_test_poly.shape}")

    # --- Define Callbacks ---
    early_stop_mse = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True, verbose=1) # Increased patience slightly
    reduce_lr_mse = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)

    early_stop_huber = EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True, verbose=1) # Slightly different patience for Huber loss model (TCN)
    reduce_lr_huber = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-5, verbose=1)


    # --- Model Training and Evaluation ---
    models_results = {}

    # === Model 1: LSTM ===
    print("\n=== Training Model 1: LSTM ===")
    model_lstm = build_lstm_model(SEQ_LENGTH)
    model_lstm.summary()
    history_lstm = model_lstm.fit(X_train_lstm_tcn, Y_train_seq, epochs=100,
                                  batch_size=32, validation_data=(X_val_lstm_tcn, Y_val_seq),
                                  callbacks=[early_stop_mse, reduce_lr_mse], verbose=1)
    plot_training_history(history_lstm, "LSTM")

    # Evaluate LSTM
    Y_pred_lstm_scaled = model_lstm.predict(X_test_lstm_tcn)
    evaluate_model(Y_test_seq, Y_pred_lstm_scaled, "LSTM", "Test (Scaled)")
    Y_test_lstm_actual = scaler_log.inverse_transform(Y_test_seq.reshape(-1, 1))
    Y_pred_lstm_actual = scaler_log.inverse_transform(Y_pred_lstm_scaled)
    lstm_metrics = evaluate_model(Y_test_lstm_actual, Y_pred_lstm_actual, "LSTM", "Test (Actual Price)")
    models_results["LSTM"] = lstm_metrics

    # Plot LSTM Predictions
    test_dates_lstm = sb_df.index[val_size_idx + SEQ_LENGTH:] # Adjust index slicing
    if len(test_dates_lstm) == len(Y_test_lstm_actual):
         plot_predictions(test_dates_lstm, Y_test_lstm_actual, Y_pred_lstm_actual, "LSTM", "Actual vs Predicted (Log_Open)")
    else:
        print(f"Warning: Date length mismatch for LSTM plot ({len(test_dates_lstm)} vs {len(Y_test_lstm_actual)})")


    # === Model 2: TCN ===
    if tcn_available:
        print("\n=== Training Model 2: TCN ===")
        model_tcn = build_tcn_model(SEQ_LENGTH)
        if model_tcn:
            model_tcn.summary()
            history_tcn = model_tcn.fit(X_train_lstm_tcn, Y_train_seq, epochs=100,
                                      batch_size=32, validation_data=(X_val_lstm_tcn, Y_val_seq),
                                      callbacks=[early_stop_huber, reduce_lr_huber], verbose=1) # Using huber callbacks
            plot_training_history(history_tcn, "TCN")

            # Evaluate TCN
            Y_pred_tcn_scaled = model_tcn.predict(X_test_lstm_tcn)
            evaluate_model(Y_test_seq, Y_pred_tcn_scaled, "TCN", "Test (Scaled)")
            Y_test_tcn_actual = scaler_log.inverse_transform(Y_test_seq.reshape(-1, 1)) # Uses same scaler as LSTM
            Y_pred_tcn_actual = scaler_log.inverse_transform(Y_pred_tcn_scaled)
            tcn_metrics = evaluate_model(Y_test_tcn_actual, Y_pred_tcn_actual, "TCN", "Test (Actual Price)")
            models_results["TCN"] = tcn_metrics

            # Plot TCN Predictions (using same dates as LSTM test set)
            if len(test_dates_lstm) == len(Y_test_tcn_actual):
                plot_predictions(test_dates_lstm, Y_test_tcn_actual, Y_pred_tcn_actual, "TCN", "Actual vs Predicted (Log_Open)")
            else:
                 print(f"Warning: Date length mismatch for TCN plot ({len(test_dates_lstm)} vs {len(Y_test_tcn_actual)})")

        else:
            print("Skipping TCN model building.")
    else:
         print("\nSkipping Model 2: TCN (library not installed)")


    # === Model 3: Polynomial DNN ===
    print("\n=== Training Model 3: Polynomial DNN ===")
    model_poly_dnn = build_poly_dnn_model(X_train_poly.shape[1])
    model_poly_dnn.summary()
    history_poly = model_poly_dnn.fit(X_train_poly, y_train_poly, epochs=500, # Original had 500 epochs
                                      batch_size=16, validation_data=(X_val_poly, y_val_poly),
                                      callbacks=[early_stop_mse, reduce_lr_mse], verbose=1) # Using MSE callbacks
    plot_training_history(history_poly, "Polynomial DNN")

    # Evaluate Polynomial DNN
    y_pred_poly_scaled = model_poly_dnn.predict(X_test_poly)
    evaluate_model(y_test_poly, y_pred_poly_scaled, "Polynomial DNN", "Test (Scaled)")
    y_test_poly_actual = scaler_y_poly.inverse_transform(y_test_poly)
    y_pred_poly_actual = scaler_y_poly.inverse_transform(y_pred_poly_scaled)
    poly_dnn_metrics = evaluate_model(y_test_poly_actual, y_pred_poly_actual, "Polynomial DNN", "Test (Actual Price)")
    models_results["Polynomial DNN"] = poly_dnn_metrics

    # Plot Polynomial DNN Predictions
    # Need to reconstruct the dates for the test set used here
    test_indices_poly = sb_df.iloc[len(X_train_poly)+len(X_val_poly):].index # Indices corresponding to test set
    test_dates_poly = df_poly.loc[test_indices_poly].index # Get dates using original poly df index
    if len(test_dates_poly) == len(y_test_poly_actual):
         plot_predictions(test_dates_poly, y_test_poly_actual, y_pred_poly_actual, "Polynomial DNN", "Actual vs Predicted (Raw Open)")
    else:
        print(f"Warning: Date length mismatch for Poly DNN plot ({len(test_dates_poly)} vs {len(y_test_poly_actual)})")


    # === Model 4: Lag Feature DNN ===
    print("\n=== Training Model 4: Lag Feature DNN ===")
    model_lag_dnn = build_lag_dnn_model(X_train_lag_flat.shape[1])
    model_lag_dnn.summary()
    history_lag = model_lag_dnn.fit(X_train_lag_flat, y_train_lag, epochs=200, # Original had 200 epochs
                                     batch_size=32, validation_data=(X_val_lag_flat, y_val_lag),
                                     callbacks=[early_stop_mse, reduce_lr_mse], verbose=1) # Using MSE callbacks
    plot_training_history(history_lag, "Lag Feature DNN")

    # Evaluate Lag Feature DNN
    y_pred_lag_scaled = model_lag_dnn.predict(X_test_lag_flat)
    evaluate_model(y_test_lag, y_pred_lag_scaled, "Lag Feature DNN", "Test (Scaled - Smoothed Log)")
    y_test_lag_actual = scaler_smoothed.inverse_transform(y_test_lag) # Use the smoothed scaler
    y_pred_lag_actual = scaler_smoothed.inverse_transform(y_pred_lag_scaled)
    lag_dnn_metrics = evaluate_model(y_test_lag_actual, y_pred_lag_actual, "Lag Feature DNN", "Test (Actual Price - based on Smoothed Log)")
    models_results["Lag Feature DNN"] = lag_dnn_metrics

    # Plot Lag Feature DNN Predictions (using same dates as LSTM/TCN test set)
    if len(test_dates_lstm) == len(y_test_lag_actual): # Note: Comparing Log_Open actuals to Smoothed_Log_Open predictions
        plot_predictions(test_dates_lstm, Y_test_lstm_actual, y_pred_lag_actual, "Lag Feature DNN", "Actual (Log Open) vs Predicted (Smoothed Log Open)")
    else:
        print(f"Warning: Date length mismatch for Lag DNN plot ({len(test_dates_lstm)} vs {len(y_test_lag_actual)})")


    # --- Compare Model Results ---
    print("\n--- Final Model Comparison (Actual Price Metrics on Test Set) ---")
    results_df = pd.DataFrame(models_results).T # Transpose for better readability
    print(results_df.sort_values(by='MAPE')) # Sort by MAPE, for example


    # --- Future Predictions (Example using LSTM model) ---
    print(f"\n--- Predicting Next {FUTURE_DAYS_TO_PREDICT} Days using LSTM Model ---")
    last_sequence_scaled = scaled_log_data[-SEQ_LENGTH:] # Get the last sequence from the scaled log data
    future_preds_log = predict_future(model_lstm, last_sequence_scaled, SEQ_LENGTH, FUTURE_DAYS_TO_PREDICT, scaler_log)

    # Create future dates
    last_date = sb_df.index[-1]
    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=FUTURE_DAYS_TO_PREDICT, freq="D") # Assuming daily predictions

    # Plot Future Predictions
    plt.figure(figsize=(15, 7))
    # Plotting historical actual 'Open' price for context
    plt.plot(sb_df.index, scaler_log.inverse_transform(data_log_open) , label="Historical Actual (Log Open)", color='blue')
    # Plotting future predictions
    plt.plot(future_dates, future_preds_log, "r-", label=f"Future {FUTURE_DAYS_TO_PREDICT}-Day Predictions (LSTM)", marker='o')
    plt.title("Starbucks Stock Price: Historical Data and Future Predictions (LSTM)")
    plt.xlabel("Date")
    plt.ylabel("Stock Price (Log Open)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    print("\n--- Script Finished ---")