In [10]:
import random

from sklearn.metrics import mean_squared_error
# Imports
import matplotlib.pyplot as plt
import yfinance as yf
import matplotlib.pyplot as plt
from sklearn.impute import KNNImputer
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy as np
from tensorflow.python.ops.gen_nn_ops import LeakyRelu

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, RepeatVector, TimeDistributed, Input, LeakyReLU
from keras.optimizers import Adam


In [11]:
# Functions
def plot_training_history(history):
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss (MSE)')
    plt.legend()
    plt.grid(True)
    plt.show()


def check_stationarity(series):
    adf_result = adfuller(series)
    print("ADF Statistic:", adf_result[0])
    print("p-value:", adf_result[1])
    if adf_result[1] <= 0.05:
        print("The series is stationary.")
    else:
        print("The series is not stationary.")

from sklearn.preprocessing import MinMaxScaler
import numpy as np

def window_generator(data, feature_columns, target_column, input_size, output_size, stride):

    X = []
    y = []

    data = data.sort_index()

    feature_scaler = MinMaxScaler()
    target_scaler = MinMaxScaler()

    scaled_features = feature_scaler.fit_transform(data[feature_columns])
    scaled_target = target_scaler.fit_transform(data[target_column])

    for start in range(0, len(data) - input_size - output_size + 1, stride):
        end_input = start + input_size
        end_output = end_input + output_size

        X.append(scaled_features[start:end_input])
        y.append(scaled_target[end_input:end_output])

    return np.array(X), np.array(y), feature_scaler, target_scaler



def test_window_generator(test_data, feature_columns, target_column, input_size, output_size, stride, feature_scaler, target_scaler):
    X_test = []
    y_test = []

    test_data = test_data.sort_index()

    scaled_features = feature_scaler.transform(test_data[feature_columns])
    scaled_target = target_scaler.transform(test_data[target_column])

    for start in range(0, len(test_data) - input_size - output_size + 1, stride):
        end_input = start + input_size
        end_output = end_input + output_size

        X_test.append(scaled_features[start:end_input])
        y_test.append(scaled_target[end_input:end_output])

    return np.array(X_test), np.array(y_test)



In [12]:


model = Sequential([
    Input(shape=(14, 882)),  # Match input shape with your data
    LSTM(50 , activation='tanh', return_sequences=True),
    Dropout(0.2),  # Regularization
    # First LSTM layer
    LSTM(25, activation='tanh', return_sequences=True),
    Dropout(0.2),  # Regularization

    # Second LSTM layer
    LSTM(10, activation='tanh', return_sequences=False),
    Dropout(0.2),  # Regularization

    # Fully connected output layer
    Dense(2, activation='linear')  # Output layer for 2 features
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['MAE'])
model.summary()

In [31]:
ticker_data = pd.read_csv('ticker_data.csv')
ticker_data['Date'] = pd.to_datetime(ticker_data['Unnamed: 0'])  # Ensure 'Date' is in datetime format
ticker_data.set_index('Date', inplace=True)
ticker_data.index = pd.to_datetime(ticker_data.index, utc=True)
ticker_data.drop('Unnamed: 0', axis=1, inplace=True)
# Example column names
columns = ticker_data.columns.to_list()

# Extract unique tickers from column names
tickers = list(set(col.split('_')[1] for col in columns))

random_tickers = random.sample(tickers, 2)

X_columns = [f"Volume_{ticker}" for ticker in columns] + [f"Price_Change_{ticker}" for ticker in columns]
y_columns = [f"Price_Change_{ticker}" for ticker in random_tickers]

ticker_data.index = pd.to_datetime(ticker_data.index)

train_data = ticker_data.loc['2010-01-01':'2018-01-01']
test_data = ticker_data.loc['2018-01-01':'2020-01-01']
X_train, y_train , feature_scaler, target_scaler = window_generator(train_data, columns, y_columns, 60, 1, 1)
X_test , y_test = test_window_generator(test_data, columns, y_columns, 60, 1, 1, feature_scaler, target_scaler)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1718, 60, 882), (1718, 1, 2), (443, 60, 882), (443, 1, 2))

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, TimeDistributed
from tensorflow.keras.optimizers import Adam
from keras_tuner import RandomSearch
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Reshape

def build_model(hp):
    """
    Build a Sequential LSTM model dynamically based on hyperparameter choices.

    Parameters:
        hp: Hyperparameter object for dynamic tuning.

    Returns:
        Compiled Keras Sequential model.
    """
    model = Sequential([
        Input(shape=(60, 882)),  # Update input shape based on your data
        LSTM(
            units=hp.Int('lstm_units1', min_value=50, max_value=150, step=50),
            activation='tanh',
            return_sequences=True
        ),
        Dropout(rate=hp.Float('dropout_rate1', min_value=0.1, max_value=0.5, step=0.1)),
        LSTM(
            units=hp.Int('lstm_units2', min_value=25, max_value=75, step=25),
            activation='relu',
            return_sequences=False  # Collapse sequence
        ),
        Dropout(rate=hp.Float('dropout_rate2', min_value=0.1, max_value=0.5, step=0.1)),
        Dense(7 * 2, activation='linear'),  # Output 7 * 2 values
        Reshape((7, 2))  # Reshape to (7, 2)
    ])
    model.compile(
        optimizer=Adam(learning_rate=hp.Choice('learning_rate', [0.001, 0.005, 0.01])),
        loss='mean_squared_error',
        metrics=['MAE']
    )
    return model


# Initialize Keras Tuner RandomSearch
tuner = RandomSearch(
    build_model,
    objective='val_MAE',
    max_trials=20,  # Increase for more comprehensive search
    executions_per_trial=1,
    directory='tuner_results',
    project_name='lstm_tuning_multi_stock',
    overwrite=True
)

X_train, y_train , feature_scaler, target_scaler = window_generator(train_data, columns, y_columns, 60, 7, 1)
X_test , y_test = test_window_generator(test_data, columns, y_columns, 60, 7, 1, feature_scaler, target_scaler)

# EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_MAE',
    patience=2,
    restore_best_weights=True,
    mode='min'
)

# Search for the best hyperparameters
tuner.search(
    X_train, y_train,
    epochs=20,
    validation_split=0.2,
    verbose=2,
    callbacks=[early_stopping]
)

# Retrieve the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print("\nBest Hyperparameters:")
for param, value in best_hps.values.items():
    print(f"{param}: {value}")

# Build the best model
best_model = tuner.hypermodel.build(best_hps)

# Train the best model
history = best_model.fit(
    X_train, y_train,
    epochs=50,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=1
)

# Plot training history
def plot_history(history):
    plt.figure(figsize=(12, 6))
    # Loss Plot
    plt.plot(history.history['loss'], label='Training Loss', color='blue')
    plt.plot(history.history['val_loss'], label='Validation Loss', color='orange')
    plt.title('Model Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss (MSE)')
    plt.legend()
    plt.grid()
    plt.show()

    # MAE Plot
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['MAE'], label='Training MAE', color='green')
    plt.plot(history.history['val_MAE'], label='Validation MAE', color='red')
    plt.title('Model MAE Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Mean Absolute Error')
    plt.legend()
    plt.grid()
    plt.show()

plot_history(history)

# Debugging: List all trials
print("\nAll Trials:")
for trial in tuner.oracle.get_best_trials():
    print(f"Trial ID: {trial.trial_id}")
    print(f"Hyperparameters: {trial.hyperparameters.values}")
    print(f"Score: {trial.score}")

Trial 10 Complete [00h 01m 23s]
val_MAE: 0.03738289698958397

Best val_MAE So Far: 0.03496492654085159
Total elapsed time: 00h 11m 16s

Search: Running Trial #11

Value             |Best Value So Far |Hyperparameter
50                |150               |lstm_units1
0.4               |0.1               |dropout_rate1
75                |25                |lstm_units2
0.4               |0.4               |dropout_rate2
0.001             |0.005             |learning_rate
Epoch 1/20
43/43 - 5s - 116ms/step - MAE: 0.1955 - loss: 0.0628 - val_MAE: 0.0972 - val_loss: 0.0121
Epoch 2/20
43/43 - 3s - 65ms/step - MAE: 0.1233 - loss: 0.0245 - val_MAE: 0.0772 - val_loss: 0.0080
Epoch 3/20


In [46]:
for trial_id, trial in tuner.oracle.trials.items():
    print(f"Trial ID: {trial_id}")
    print(f"Hyperparameters: {trial.hyperparameters.values}")
    print(f"Score: {trial.score}")

Trial ID: 00
Hyperparameters: {'lstm_units1': 150, 'dropout_rate1': 0.5, 'lstm_units2': 75, 'dropout_rate2': 0.5, 'lstm_units3': 40, 'learning_rate': 0.01}
Score: 0.2517491281032562
Trial ID: 01
Hyperparameters: {'lstm_units1': 50, 'dropout_rate1': 0.30000000000000004, 'lstm_units2': 50, 'dropout_rate2': 0.1, 'lstm_units3': 10, 'learning_rate': 0.005}
Score: 0.2748723030090332
Trial ID: 02
Hyperparameters: {'lstm_units1': 50, 'dropout_rate1': 0.4, 'lstm_units2': 75, 'dropout_rate2': 0.1, 'lstm_units3': 50, 'learning_rate': 0.01}
Score: 0.25560158491134644
Trial ID: 03
Hyperparameters: {'lstm_units1': 150, 'dropout_rate1': 0.1, 'lstm_units2': 50, 'dropout_rate2': 0.1, 'lstm_units3': 50, 'learning_rate': 0.005}
Score: 0.2594822645187378
Trial ID: 04
Hyperparameters: {'lstm_units1': 100, 'dropout_rate1': 0.4, 'lstm_units2': 25, 'dropout_rate2': 0.30000000000000004, 'lstm_units3': 10, 'learning_rate': 0.01}
Score: 0.2589743733406067
Trial ID: 05
Hyperparameters: {'lstm_units1': 50, 'dropou