In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras import layers
from keras.models import Model
import keras
from datetime import datetime
import matplotlib.pyplot as plt
from scripts.modelling import transform, invert_transform, root_mean_squared_error, mean_absolute_error, model_wrap
from scipy.signal import correlate
from scipy.fft import fft

def split_data(data, window_size=15):
    x = []
    y = []
    start = 0
    end = start + window_size

    while end < data.shape[0]:
        x.append(data.iloc[start:end, 0:4])
        y.append(data.iloc[end, [0, 3]])

        start = end
        end = start + window_size

    x = np.array(x)
    y = np.array(y)

    split = int(x.shape[0]*0.7)

    return x[0:split, :, :], y[0:split], x[split:, :, :], y[split:]

def feature_extractor(data):
    res = np.zeros(shape=(data.shape[0], 2))

    for row in range(data.shape[0]):
        segment = data[row, :, 3]
        low = np.min(segment)
        high = np.max(segment)

        res[row, 0] = (segment[len(segment)-1] - low)/(high-low)*100
        res[row, 1] = (segment[len(segment)-1]-segment[0])/(15*60)

    return res

# Sinusoidal positional encoding, things more close in space have a similar frequency
def positional_encoding(seq_len, d_model):
    position = np.expand_dims(np.arange(seq_len), 1)  # Shape: (seq_len, 1)
    div_term = (10000**(np.arange(0, d_model, 2)/d_model))

    pos_enc = np.zeros((seq_len, d_model))
    pos_enc[:, 0::2] = np.sin(position * div_term)  # Apply sine to even indices
    pos_enc[:, 1::2] = np.cos(position * div_term)  # Apply cosine to odd indices

    return tf.convert_to_tensor(pos_enc, dtype=tf.float32)

# Positional embedding layers
class PositionalEmbedding(tf.keras.layers.Layer):
    def __init__(self, d_model):
        super().__init__()
        self.d_model = d_model
        self.pos_encoding = positional_encoding(length=15, depth=d_model)

    # Add encodings to values
    def call(self, x):
        x = x + self.pos_encoding
        return x
    
# Standard self attention layers
class SelfAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
        self.layernorm = tf.keras.layers.LayerNormalization()
        self.add = tf.keras.layers.Add()
    
    # Do self attention -> add -> norm
    def call(self, x):
        attn_outputs = self.mha(query=x, key=x, value=x)
        x = self.add([x, attn_outputs])
        x = self.layernorm(x)
        return x




def autocorr(x):
    var = np.var(x)
    x = x-np.mean(x)
    result = correlate(x, x, mode='full', method='auto')
    result = result[result.size//2:]
    return result/(var*np.arange(len(x), 0, -1))

In [3]:
from polygon import RESTClient
from dotenv import load_dotenv
import os

load_dotenv()

# Access the API key
api_key = os.getenv("API_KEY")
client = RESTClient(api_key=api_key)

today = datetime.today().strftime('%Y-%m-%d')
last_year = (datetime.today() - pd.DateOffset(years=2)).strftime('%Y-%m-%d')

ticker = 'AMD'
resp = pd.DataFrame(client.get_aggs(ticker, multiplier=1, timespan='minute', from_=last_year, to=today, limit=120000))
resp = resp.drop(columns=['timestamp', 'otc'])
#resp.to_csv(f'{ticker}_{today}.csv', index=False)

In [None]:
'''
news_articles = client.list_ticker_news(
	"AAPL", 
	params={"published_utc.gte": today}, 
	order="desc", 
	limit=1000
	)

for article in news_articles:
    print(f"{article.title} [Insights: {article.insights}]")
'''

In [9]:
data = pd.read_csv(f'AAPL_2024-12-17.csv')
x_train, y_train, x_test, y_test = split_data(data)

encoding = positional_encoding(15, 4)

Trend forecasting

- 1. Autocorrelation
> Determine general direction of the stock's open over time period
> Potential extra model input

- 2. Velocity and Acceleration(?) of stock
> Calculate total window velocity (price delta/time delta)
> Calculate point to point velcoity 

- 3. Model Changes
> Swap to transformer
> Use more inputs
> Batch predict stocks of the same general industry (semiconductors: AAPL, AMD, NVDA, SSNLF, TSMC)
> Incorporate sentiment analysis into model

In [None]:
# 1: Autocorrelation of open and close
fig, ax = plt.subplots(1,2,figsize=(10,5))
i = 2
autocorr_open = autocorr(x_train[i, :, 0])
autocorr_close = autocorr(x_train[i, :, 0])

ax[0].plot(x_train[i, :, 0])
ax[1].plot(autocorr_close)

In [None]:
model = model_wrap(model=NN(x_train.shape[1]), transform=transform, invert_transform=invert_transform)
model.model.summary()

loss = keras.losses.MeanSquaredError()
optimizer = keras.optimizers.Adam(learning_rate=0.001)

model.fit(x_train, y_train, loss=loss, optimizer=optimizer, epochs=100)
model.model.save('LSTM_feedend.keras')

In [None]:
y_pred = model.predict(x_train, y_train)

res_df = pd.DataFrame({'Open MAE': mean_absolute_error(y_train[:, 0], y_pred[:, 0]),
                       'Close MAE': mean_absolute_error(y_train[:, 1], y_pred[:, 1])}, index=[0])

display(res_df)

fig, ax = plt.subplots(1,2, figsize=(10,5))
ax[0].scatter(y_train[:, 0], y_pred[:, 0])
ax[1].scatter(y_train[:, 1], y_pred[:, 1])
ax[0].plot(y_train[:, 0], y_train[:, 0], c='r')
ax[1].plot(y_train[:, 1], y_train[:, 1], c='r')

fig, ax = plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(y_train[:, 0]-y_pred[:, 0], y_train[:, 0])
ax[1].scatter(y_train[:, 1]-y_pred[:, 1], y_train[:, 1])

fig, ax = plt.subplots(1,2,figsize=(10,5))
ax[0].plot(y_train[:, 0])
ax[0].plot(y_pred[:, 0])
ax[1].plot(y_train[:, 1])
ax[1].plot(y_pred[:, 1])

In [None]:
y_pred = model.predict(x_test, y_test)
display(y_test.shape, y_pred.shape)

res_df = pd.DataFrame({'Open MAE': mean_absolute_error(y_test[:, 0], y_pred[:, 0]),
                       'Close MAE': mean_absolute_error(y_test[:, 1], y_pred[:, 1])}, index=[0])

display(res_df)

fig, ax = plt.subplots(1,2, figsize=(10,5))
ax[0].scatter(y_test[:, 0], y_pred[:, 0])
ax[1].scatter(y_test[:, 1], y_pred[:, 1])
ax[0].plot(y_test[:, 0], y_test[:, 0], c='r')
ax[1].plot(y_test[:, 1], y_test[:, 1], c='r')

fig, ax = plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(y_test[:, 0]-y_pred[:, 0], y_test[:, 0])
ax[1].scatter(y_test[:, 1]-y_pred[:, 1], y_test[:, 1])

fig, ax = plt.subplots(1,2,figsize=(10,5))
ax[0].plot(y_test[:, 0])
ax[0].plot(y_pred[:, 0])
ax[1].plot(y_test[:, 1])
ax[1].plot(y_pred[:, 1])