In [None]:
!pip install ta
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, Add, GlobalAveragePooling1D, Conv1D
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler
import yfinance as yf
import random
import os
from google.colab import drive
from tensorflow.keras.losses import MeanSquaredLogarithmicError
import matplotlib.pyplot as plt
from numpy.lib.stride_tricks import sliding_window_view
import ta
from keras import regularizers

In [None]:
def get_s_and_p_tickers():
  s_and_p = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
  tickers = [symbol for symbol in s_and_p.Symbol.to_list() if str.isalpha(symbol)]
  return tickers

In [None]:
tickers = get_s_and_p_tickers()

In [None]:
def get_s_and_p(tickers):

  ticker_data_frames = {}
  remove = []
  for ticker in tickers:

      data = yf.download(ticker, period="5y", interval="1d")

      if data.empty:
        remove.append(ticker)
        import time
        time.sleep(15)
        continue

      close = data['Close'].squeeze()
      volume = data['Volume'].squeeze()
      high = data['High'].squeeze()
      low = data['Low'].squeeze()
      diff = data['Close'].diff(1).squeeze()
      percent_change_close = (data['Close'].pct_change() * 100).squeeze()
      volatility = (data['Close'].pct_change().std() * (252**0.5)).squeeze()
      rolling_average_close = data['Close'].rolling(window=20).mean().squeeze()
      time_index = np.arange(close.shape[0], 0, -1)

      ticker_df = pd.DataFrame({
          'close': close,
          'high': high,
          'low': low,
          'time_index': time_index,
          'volatility': volatility,
          'rolling_average_close': rolling_average_close,
          'percent_change_close': percent_change_close,
          'volume': volume,
          'diff': diff,
      })

      ticker_df.replace([np.inf, -np.inf], np.nan, inplace=True)
      ticker_df.dropna(inplace=True)

      ticker_data_frames[ticker] = ticker_df

  for ticker in remove:
    tickers.remove(ticker)

  return ticker_data_frames

In [None]:
def get_s_and_p_additonal(tickers):
    ticker_data_frames = {}
    remove = []

    for ticker in tickers:
        try:
            data = yf.download(ticker, period="5y", interval="1d")
            financial_data = yf.Ticker(ticker)

            if data.empty:
                print(f"No data found for {ticker}")
                remove.append(ticker)
                continue

            close = data['Close'].squeeze()
            volume = data['Volume'].squeeze()
            high = data['High'].squeeze()
            low = data['Low'].squeeze()
            diff = data['Close'].diff(1).squeeze()
            percent_change_close = (data['Close'].pct_change() * 100).squeeze()
            volatility = (data['Close'].pct_change().std() * (252**0.5)).squeeze()
            rolling_average_close = data['Close'].rolling(window=20).mean().squeeze()
            time_index = np.arange(close.shape[0], 0, -1)

            ticker_df = pd.DataFrame({
                'close': close,
                'high': high,
                'low': low,
                'time_index': time_index,
                'volatility': volatility,
                'rolling_average_close': rolling_average_close,
                'percent_change_close': percent_change_close,
                'volume': volume,
                'diff': diff,
            })

            # Volume Indicators
            ticker_df['volume_adi'] = ta.volume.acc_dist_index(high, low, close, volume)
            ticker_df['volume_obv'] = ta.volume.on_balance_volume(close, volume)
            ticker_df['volume_cmf'] = ta.volume.chaikin_money_flow(high, low, close, volume)
            ticker_df['volume_fi'] = ta.volume.force_index(close, volume)
            ticker_df['volume_em'] = ta.volume.ease_of_movement(high, low, volume)
            ticker_df['volume_sma_em'] = ta.volume.sma_ease_of_movement(high, low, volume)
            ticker_df['volume_vpt'] = ta.volume.volume_price_trend(close, volume)
            ticker_df['volume_nvi'] = ta.volume.negative_volume_index(close, volume)
            ticker_df['volume_vwap'] = ta.volume.volume_weighted_average_price(high, low, close, volume)
            ticker_df['volume_mfi'] = ta.volume.money_flow_index(high, low, close, volume)

            # Volatility Indicators
            ticker_df['volatility_atr'] = ta.volatility.average_true_range(high, low, close)
            ticker_df['volatility_bbm'] = ta.volatility.bollinger_mavg(close)
            ticker_df['volatility_bbh'] = ta.volatility.bollinger_hband(close)
            ticker_df['volatility_bbl'] = ta.volatility.bollinger_lband(close)
            ticker_df['volatility_bbhi'] = ta.volatility.bollinger_hband_indicator(close)
            ticker_df['volatility_bbli'] = ta.volatility.bollinger_lband_indicator(close)
            ticker_df['volatility_bbw'] = ta.volatility.bollinger_wband(close)
            ticker_df['volatility_bbp'] = ta.volatility.bollinger_pband(close)
            ticker_df['volatility_kcp'] = ta.volatility.keltner_channel_mband(high, low, close)
            ticker_df['volatility_kch'] = ta.volatility.keltner_channel_hband(high, low, close)
            ticker_df['volatility_kcl'] = ta.volatility.keltner_channel_lband(high, low, close)
            ticker_df['volatility_kchi'] = ta.volatility.keltner_channel_hband_indicator(high, low, close)
            ticker_df['volatility_kcli'] = ta.volatility.keltner_channel_lband_indicator(high, low, close)
            ticker_df['volatility_kcw'] = ta.volatility.keltner_channel_wband(high, low, close)
            ticker_df['volatility_kcp'] = ta.volatility.keltner_channel_pband(high, low, close)
            ticker_df['volatility_dcl'] = ta.volatility.donchian_channel_lband(high, low, close)
            ticker_df['volatility_dch'] = ta.volatility.donchian_channel_hband(high, low, close)
            ticker_df['volatility_dcm'] = ta.volatility.donchian_channel_mband(high, low, close)
            ticker_df['volatility_dcw'] = ta.volatility.donchian_channel_wband(high, low, close)
            ticker_df['volatility_dcp'] = ta.volatility.donchian_channel_pband(high, low, close)
            ticker_df['volatility_ui'] = ta.volatility.ulcer_index(close)

            # Trend Indicators
            ticker_df['trend_macd'] = ta.trend.macd(close)
            ticker_df['trend_macd_signal'] = ta.trend.macd_signal(close)
            ticker_df['trend_macd_diff'] = ta.trend.macd_diff(close)
            ticker_df['trend_adx'] = ta.trend.adx(high, low, close)
            ticker_df['trend_adx_pos'] = ta.trend.adx_pos(high, low, close)
            ticker_df['trend_adx_neg'] = ta.trend.adx_neg(high, low, close)
            ticker_df['trend_vortex_ind_pos'] = ta.trend.vortex_indicator_pos(high, low, close)
            ticker_df['trend_vortex_ind_neg'] = ta.trend.vortex_indicator_neg(high, low, close)
            ticker_df['trend_trix'] = ta.trend.trix(close)
            ticker_df['trend_mass_index'] = ta.trend.mass_index(high, low)
            ticker_df['trend_cci'] = ta.trend.cci(high, low, close)
            ticker_df['trend_dpo'] = ta.trend.dpo(close)
            ticker_df['trend_kst'] = ta.trend.kst(close)
            ticker_df['trend_kst_sig'] = ta.trend.kst_sig(close)
            ticker_df['trend_ichimoku_a'] = ta.trend.ichimoku_a(high, low)
            ticker_df['trend_ichimoku_b'] = ta.trend.ichimoku_b(high, low)
            ticker_df['trend_ichimoku_base'] = ta.trend.ichimoku_base_line(high, low)
            ticker_df['trend_ichimoku_conv'] = ta.trend.ichimoku_conversion_line(high, low)
            ticker_df['trend_sma'] = ta.trend.sma_indicator(close)
            ticker_df['trend_ema'] = ta.trend.ema_indicator(close)
            ticker_df['trend_wma'] = ta.trend.wma_indicator(close)
            ticker_df['trend_aroon_up'] = ta.trend.aroon_up(high, low)
            ticker_df['trend_aroon_down'] = ta.trend.aroon_down(high, low)

            # Momentum Indicators
            ticker_df['momentum_rsi'] = ta.momentum.rsi(close)
            ticker_df['momentum_stoch_rsi'] = ta.momentum.stochrsi(close)
            ticker_df['momentum_stoch_rsi_k'] = ta.momentum.stochrsi_k(close)
            ticker_df['momentum_stoch_rsi_d'] = ta.momentum.stochrsi_d(close)
            ticker_df['momentum_stoch'] = ta.momentum.stoch(high, low, close)
            ticker_df['momentum_stoch_signal'] = ta.momentum.stoch_signal(high, low, close)
            ticker_df['momentum_tsi'] = ta.momentum.tsi(close)
            ticker_df['momentum_ultimate_oscillator'] = ta.momentum.ultimate_oscillator(high, low, close)
            ticker_df['momentum_ao'] = ta.momentum.awesome_oscillator(high, low)
            ticker_df['momentum_kama'] = ta.momentum.kama(close)
            ticker_df['momentum_roc'] = ta.momentum.roc(close)
            ticker_df['momentum_ppo'] = ta.momentum.ppo(close)
            ticker_df['momentum_ppo_signal'] = ta.momentum.ppo_signal(close)
            ticker_df['momentum_ppo_hist'] = ta.momentum.ppo_hist(close)
            ticker_df['momentum_pvo'] = ta.momentum.pvo(volume)
            ticker_df['momentum_pvo_signal'] = ta.momentum.pvo_signal(volume)
            ticker_df['momentum_pvo_hist'] = ta.momentum.pvo_hist(volume)
            ticker_df['momentum_wr'] = ta.momentum.williams_r(high, low, close)

            # Get fundamental data

            financial_data = yf.Ticker(ticker).info

            # Add fundamental metrics to DataFrame
            metrics = {
                'forward_pe': financial_data.get('forwardPE', np.nan),
                'target_median_price': financial_data.get('targetMedianPrice', np.nan),
                'target_mean_price': financial_data.get('targetMeanPrice', np.nan),
                'current_ratio': financial_data.get('currentRatio', np.nan),
                #'trailing_peg_ratio': financial_data.get('trailingPegRatio', np.nan),
                'short_date': financial_data.get('dateShortInterest', np.nan),
                'price_to_book': financial_data.get('priceToBook', np.nan),
                'enterprise_to_revenue': financial_data.get('enterpriseToRevenue', np.nan),
                'enterprise_to_ebitda': financial_data.get('enterpriseToEbitda', np.nan),
                'year_change': financial_data.get('52WeekChange', np.nan),
                'roa': financial_data.get('returnOnAssets', np.nan),
                'roe': financial_data.get('returnOnEquity', np.nan),
                'revenue_growth': financial_data.get('revenueGrowth', np.nan),
                'gross_margins': financial_data.get('grossMargins', np.nan),
                'ebitda_margins': financial_data.get('ebitdaMargins', np.nan),
                'operating_margins': financial_data.get('operatingMargins', np.nan),
                #'trailing_pe': financial_data.get('trailingPE', np.nan),
                'beta': financial_data.get('beta', np.nan),
                'audit_risk': financial_data.get('auditRisk', np.nan),
                'board_risk': financial_data.get('boardRisk', np.nan),
                'overall_risk': financial_data.get('overallRisk', np.nan),
                'average_volume': financial_data.get('averageVolume', np.nan),
                'average_volume_10d': financial_data.get('averageVolume10days', np.nan),
                'average_daily_volume_10d': financial_data.get('averageDailyVolume10Day', np.nan),
                'price_to_sales': financial_data.get('priceToSalesTrailing12Months', np.nan),
                'short_ratio': financial_data.get('shortRatio', np.nan),
                'short_percent_float': financial_data.get('shortPercentOfFloat', np.nan)
            }

            # Fill DataFrame with constant values for all rows
            for metric, value in metrics.items():
                ticker_df[metric] = value

            # Clean up the data
            ticker_df.replace([np.inf, -np.inf], np.nan, inplace=True)
            ticker_df = ticker_df.infer_objects()
            ticker_df = ticker_df.apply(lambda col: col.fillna(0) if col.dtype != 'object' else col.fillna(''))

            # Add to dictionary
            ticker_data_frames[ticker] = ticker_df

        except Exception as e:
            print(f"Error processing {ticker}: {str(e)}")
            remove.append(ticker)
            continue


    for ticker in remove:
        tickers.remove(ticker)

    return ticker_data_frames

In [None]:
ticker_data_frames = get_s_and_p(tickers)

In [None]:
from typing import Sequence
def zeroMaxScaler(data, train_percent):
  epsilon = 0.001
  max = np.maximum(np.max(data[0: math.floor(data.shape[0] * train_percent)], axis=(0, 1)).reshape(1, 1, -1), epsilon)
  return data / max


def create_sequences(
    dataframe_dict: dict,
    tickers: list,
    train_percent: float = .8,
    sequence_length: int = 20,
  ):

    epsilon = 1e-12

    #scaler = MinMaxScaler()
    scaler = StandardScaler()
    #scaler = MaxAbsScaler()

    sequence_dict = {}
    sequence_label_dict = {}
    for ticker in tickers:
      dataframe = dataframe_dict[ticker]
      numerical_columns = dataframe.select_dtypes(include=np.number).columns
      data = dataframe[numerical_columns].to_numpy()
      N, D = data.shape
      L = sequence_length

      sequences = sliding_window_view(data, window_shape=L, axis=0)[:-1]

      mean = sequences.mean(axis=2, keepdims=True)
      std = sequences.std(axis=2, keepdims=True)

      normalized_sequences = (sequences - mean) / (std + epsilon)
      sequence_dict[ticker] = normalized_sequences

      close_index = dataframe.columns.get_loc('close')

      last_day_indices = np.arange(L - 1, N - 1)
      next_day_indices = last_day_indices + 1
      last_day_close_values = data[last_day_indices, close_index]

      next_day_close_values = data[next_day_indices, close_index]

      mean_close = mean[:, close_index, 0]
      std_close = std[:, close_index, 0]

      last_day_close = (last_day_close_values - mean_close) / (std_close + epsilon)
      next_day_close = (next_day_close_values - mean_close) / (std_close + epsilon)

      sequence_labels = np.column_stack((last_day_close, next_day_close, mean_close, std_close))
      sequence_label_dict[ticker] = sequence_labels

    return sequence_dict, sequence_label_dict


def train_test_split(
    ticker_data_frames: dict,
    tickers: list,
    train_percent: float = .8,
    sequence_length: int = 20,
):

  train_data = []
  train_labels = []
  dev_data = []
  dev_labels = []
  test_data = []
  test_labels = []

  sequence_dict, label_dict = create_sequences(ticker_data_frames, tickers, train_percent, sequence_length)

  for ticker in sequence_dict.keys():
    sequence_len = len(sequence_dict[ticker])
    train_data.append(sequence_dict[ticker][:math.floor(sequence_len * train_percent)])
    hold_out_data = sequence_dict[ticker][math.floor(sequence_len * train_percent):]
    dev_data.append(hold_out_data[len(hold_out_data) // 2:])
    test_data.append(hold_out_data[:len(hold_out_data) // 2])

    train_labels.append(label_dict[ticker][:math.floor(sequence_len * train_percent)])
    hold_out_labels = label_dict[ticker][math.floor(sequence_len * train_percent):]
    dev_labels.append(hold_out_labels[len(hold_out_data) // 2:])
    test_labels.append(hold_out_labels[:len(hold_out_labels) // 2])

  train_data = np.concatenate(train_data, axis=0)
  train_labels = np.concatenate(train_labels, axis=0)
  dev_data = np.concatenate(dev_data, axis=0)
  dev_labels = np.concatenate(dev_labels, axis=0)
  test_data = np.concatenate(test_data, axis=0)
  test_labels = np.concatenate(test_labels, axis=0)

  return train_data, train_labels, dev_data, dev_labels, test_data, test_labels


In [None]:
train_data, train_labels, dev_data, dev_labels, test_data, test_labels = train_test_split(ticker_data_frames, tickers)

In [None]:
def mae_loss(y_true, y_pred):
    y_true_next = tf.cast(y_true[:, 1], tf.float64)
    y_pred_next = tf.cast(y_pred[:, 0], tf.float64)
    abs_error = tf.abs(y_true_next - y_pred_next)

    return tf.reduce_mean(abs_error)

def mse_loss(y_true, y_pred):
    y_true_next = tf.cast(y_true[:, 1], tf.float64)
    y_pred_next = tf.cast(y_pred[:, 0], tf.float64)
    mse_error = (y_true_next - y_pred_next) ** 2

    return tf.reduce_mean(mse_error)

def dir_acc(y_true, y_pred):
    y_true_prev = tf.cast(y_true[:, 0], tf.float64)
    y_true_next = tf.cast(y_true[:, 1], tf.float64)
    y_pred_next = tf.cast(y_pred[:, 0], tf.float64)

    true_change = y_true_next - y_true_prev
    pred_change = y_pred_next - y_true_prev

    correct_direction = tf.equal(tf.sign(true_change), tf.sign(pred_change))

    return tf.reduce_mean(tf.cast(correct_direction, tf.float64))


def percent_error_true(y_true, y_pred):
    epsilon = 1e-6
    y_true_next = tf.cast(y_true[:, 1], tf.float64)
    y_pred_next = tf.cast(y_pred[:, 0], tf.float64)
    percent_error = abs(y_true_next - y_pred_next) / (y_true_next + epsilon)
    return tf.reduce_mean(tf.cast(percent_error, tf.float64))

def percent_error_adjusted(y_true, y_pred): #Use this one for 0 - 1 scaler
    threshold = 1e-3
    y_true_next = tf.cast(y_true[:, 1], tf.float64)
    y_pred_next = tf.cast(y_pred[:, 0], tf.float64)
    percent_error = tf.where(
        y_true_next > threshold,
        abs(y_true_next - y_pred_next) / (y_true_next),
        tf.zeros_like(y_true_next)
    )
    return tf.reduce_mean(tf.cast(percent_error, tf.float64))

def percent_error_squared_adjusted(y_true, y_pred):
    threshold = 1e-3
    y_true_next = tf.cast(y_true[:, 1], tf.float64)
    y_pred_next = tf.cast(y_pred[:, 0], tf.float64)
    percent_error = tf.where(
        y_true_next > threshold,
        abs(y_true_next - y_pred_next) / (y_true_next),
        tf.zeros_like(y_true_next)
    )
    return tf.reduce_mean(tf.cast(percent_error, tf.float64))

def percent_error_unnormalized(y_true, y_pred): #Use this one for std scaler
    threshold = 1e-3
    y_true_next = tf.cast(y_true[:, 1], tf.float64) * tf.cast(y_true[:, 3], tf.float64) + tf.cast(y_true[:, 2], tf.float64)
    y_pred_next = tf.cast(y_pred[:, 0], tf.float64) * tf.cast(y_true[:, 3], tf.float64) + tf.cast(y_true[:, 2], tf.float64)
    percent_error = tf.where(
        tf.math.abs(y_true_next) > threshold,
        abs(y_true_next - y_pred_next) / y_true_next,
        tf.zeros_like(y_true_next),
    )
    return tf.reduce_mean(percent_error)

def percent_error_squared_unnormalized(y_true, y_pred):
    threshold = 1e-3
    y_true_next = tf.cast(y_true[:, 1], tf.float64) * tf.cast(y_true[:, 3], tf.float64) + tf.cast(y_true[:, 2], tf.float64)
    y_pred_next = tf.cast(y_pred[:, 0], tf.float64) * tf.cast(y_true[:, 3], tf.float64) + tf.cast(y_true[:, 2], tf.float64)
    percent_error = tf.where(
        tf.math.abs(y_true_next) > threshold,
        (y_true_next - y_pred_next) / y_true_next,
        tf.zeros_like(y_true_next),
    )
    return tf.reduce_mean(tf.square(percent_error))


def error_direction(y_true, y_pred):
    y_true_next = tf.cast(y_true[:, 1], tf.float64)
    y_pred_next = tf.cast(y_pred[:, 0], tf.float64)
    error_direction = tf.sign(y_true_next - y_pred_next)
    return tf.reduce_mean(tf.cast(error_direction, tf.float64))

meanSquaredLogarithmicError = MeanSquaredLogarithmicError()


In [None]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0, num_filters = 64, kernel_size = 3):
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = Conv1D(
        filters=num_filters // 8,
        kernel_size=kernel_size,
        padding="same",
        activation="relu",
    )(x)
    x = Conv1D(
        filters=num_filters // 4,
        kernel_size=kernel_size,
        padding="same",
        activation="relu",
    )(x)
    x = Conv1D(
        filters=num_filters // 2,
        kernel_size=kernel_size,
        padding="same",
        activation="relu",
    )(x)
    x = Conv1D(
        filters=num_filters,
        kernel_size=kernel_size,
        padding="same",
        activation="relu",
    )(x)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = Conv1D(
        filters=20,
        kernel_size=kernel_size,
        padding="same",
        activation="relu",
    )(x)
    x = Add()([x, inputs])

    y = LayerNormalization(epsilon=1e-6)(x)
    y = Dense(ff_dim, activation="relu",  kernel_regularizer=regularizers.L2(l2=1e-4))(y)
    y = Dropout(dropout)(y)
    y = Dense(inputs.shape[-1], kernel_regularizer=regularizers.L2(l2=1e-4))(y)
    return Add()([y, x])

def build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout=0):
    inputs = Input(shape=input_shape)
    x = inputs

    for _ in range(num_layers):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = GlobalAveragePooling1D()(x)
    x = LayerNormalization(epsilon=1e-6)(x)
    outputs = Dense(1, activation="linear")(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

def compile_model(
  input_shape,
  head_size,
  num_heads,
  ff_dim,
  num_layers,
  dropout,
  optimizer=tf.keras.optimizers.Adam(),
  loss=percent_error_squared_adjusted,
  metrics=[dir_acc, percent_error_true, error_direction, mse_loss, mae_loss],
):
  if os.path.exists("transformer_train_model.keras"):
    os.remove("transformer_train_model.keras")
  if os.path.exists("transformer_val_model.keras"):
    os.remove("transformer_val_model.keras")

  model = build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout)
  model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
  #model.summary()
  return model

def train_model(
  model,
  train_sequences,
  train_labels,
  validation_sequences,
  validation_labels,
  epochs: int,
  batch_size: int,
):

  checkpoint_callback_val = ModelCheckpoint(
      "transformer_val_model.keras",
      monitor="val_loss",
      save_best_only=True,
      mode="min",
      verbose=1,
  )

  model.fit(
    train_sequences,
    train_labels,
    validation_data=(validation_sequences, validation_labels),
    epochs=epochs,
    batch_size=batch_size,
    shuffle=True,
    callbacks=[checkpoint_callback_val],
  )

def evaluate_model(
  model,
  test_sequences,
  test_labels,
):

  if os.path.exists("transformer_val_model.keras"):
    model.load_weights("transformer_val_model.keras")
  else:
    print("No model found")
    return

  accuracy = model.evaluate(test_sequences, test_labels)[1]
  predictions = model.predict(test_sequences)
  r2 = r2_score(test_labels[:, 1], predictions[:, 0])

  return predictions, accuracy, r2

# TODO future implementation
# def iterative_evaluation(
#   model,
#   test_sequences,
#   test_labels,
# ):
# # Lists to store predictions and actual values
# predictions = []
# actuals = []

# # Loop over each time step starting from n_steps
# for i in range(len(X)):
#     X_input = X[i].reshape((1, n_steps, 1))

#     # Predict on the current batch
#     y_pred = model.predict(X_input, verbose=0)
#     predictions.append(y_pred[0][0])
#     actuals.append(y[i][0])

#     # Train on the current batch
#     model.fit(X_input, y[i].reshape(1, 1), epochs=1, batch_size=batch_size, verbose=0)



In [None]:
#MSE
optimizer=tf.keras.optimizers.Adam()
model = compile_model(train_data.shape[1:], 16, 8, 128, 6, 0.20, optimizer=optimizer, loss=meanSquaredLogarithmicError)
train_model(model, train_data, train_labels, dev_data, dev_labels, epochs=100, batch_size=8192)
predictions, accuracy, r2 = evaluate_model(model, test_data, test_labels)

In [None]:
i = 1000
size = 10

mean = test_labels[i:i + size, 2]
std = test_labels[i:i + size, 3]
print("First 10 predictions:")
print(predictions[i:i + size, 0] * std + mean)

print("Corresponding actual values:")
print(test_labels[i:i + size, 1] * std + mean)

In [None]:
epochs = 10
num_models=10

def hyperparam_search(
    num_models: int,
    train_sequences,
    train_labels,
    val_sequences,
    val_labels,
    test_sequences,
    test_labels,
    optimizer=tf.keras.optimizers.Adam(),
    loss=mse_loss,
    metrics=[dir_acc],
):

  input_shape = train_sequences.shape[1:]
  head_size_arr = [64, 128, 256, 512] #256
  num_heads_arr = [8, 16, 32, 64] #16
  ff_dim_arr = [256, 512, 1024]#, 2048] # 1024
  num_layers_arr = [8, 10, 12, 14, 16, 18] # 12
  dropout_arr = [0, 0.05, 0.10, 0.20, 0.3, 0.4, 0.5, 0.6] # 0.20
  batch_size_arr = [32, 64, 128]

  best_accuracy = 0
  best_params = None

  for i in range(num_models):
    try:
      batch_size = random.choice(batch_size_arr)
      head_size = random.choice(head_size_arr)
      num_heads = random.choice(num_heads_arr)
      ff_dim = random.choice(ff_dim_arr)
      num_layers = random.choice(num_layers_arr)
      dropout = random.choice(dropout_arr)

      model = compile_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout, optimizer, loss, metrics)
      train_model(model, train_sequences, train_labels, val_sequences, val_labels, epochs, batch_size)
      predictions, accuracy, r2 = evaluate_model(model, test_sequences, test_labels)

      if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = (input_shape, head_size, num_heads, ff_dim, num_layers, dropout)

      write = f"R-squared: {r2}, accuracy: {accuracy}, batch_size: {batch_size}, head_size: {head_size}, num_heads: {num_heads}, ff_dim: {ff_dim}, num_layers: {num_layers}, dropout: {dropout}"
      print(write)

    except Exception as e:
      print(f"Exception: {e}, continuing")

    best = f"best params: {best_params}, best accuracy: {best_accuracy}"
    print(best)


In [None]:
hyperparam_search(
    num_models,
    train_data,
    train_labels,
    dev_data,
    dev_labels,
    test_data,
    test_labels,
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss=mse_loss,
    metrics=[dir_acc],
)