In [7]:
import os
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler

In [None]:
# Avoid shenenigans with backend settings from keras 3
os.environ["KERAS_BACKEND"] = "tensorflow"
from keras import layers
from keras.models import Sequential, load_model

In [None]:
sp500 = pickle.load(open("./sp500.pickle", "rb"))

In [8]:
def create_windows(data, win_size):
    X = []
    for i in range(len(data) - win_size):
        X.append(data[i : i + win_size])
    return np.array(X), np.array(X)[:, :, 0]

In [9]:
training_windows = []
training_labels = []
testing_windows = {}
testing_labels = {}
scalars = {}
WIN_SIZE = 7
for symbol, stock in sp500.items():
    stock.set_index(stock.columns[0], inplace=True)
    stock.index = pd.to_datetime(stock.index)
    stock["log_return"] = np.log(
        stock["adjusted_close"] / stock["adjusted_close"].shift(1)
    )
    stock["log_return"] = stock["log_return"].fillna(0)

    stock["rolling_std"] = stock["log_return"].rolling(WIN_SIZE).std().fillna(0)

    stock = stock[["log_return", "volume", "rolling_std"]]
    stock = stock.astype(np.float32)

    train = stock.loc[stock[stock.index < "2021-01-01"].index]
    test = stock.loc[stock[stock.index >= "2021-01-01"].index]

    scaler = StandardScaler()

    if not train.empty:
        train_scaled = scaler.fit_transform(train)
        test_scaled = scaler.transform(test)
        train_windows, train_labels = create_windows(train_scaled, WIN_SIZE)
        test_windows, test_labels = create_windows(test_scaled, WIN_SIZE)
        training_windows.append(train_windows)
        training_labels.append(train_labels)
        testing_windows[symbol] = test_windows
        testing_labels[symbol] = test_labels
    else:
        # If the stock doesn't have enough data to cover one window size, skip it
        if len(test) < WIN_SIZE:
            continue
        test_scaled = scaler.fit_transform(test)
        test_windows, test_labels = create_windows(test_scaled, WIN_SIZE)
        testing_windows[symbol] = test_windows
        testing_labels[symbol] = test_labels
        scaler.fit(test)

    scalars[symbol] = scaler

training_windows = np.concatenate(training_windows)
training_labels = np.concatenate(training_labels)

In [None]:
print(
    training_windows.shape,
    training_labels.shape,
    len(testing_windows),
    len(testing_labels),
)

# Model definition and training

## AE

In [None]:
training_windows.shape[1], training_windows.shape[2]

In [None]:
ae = Sequential(name="autoencoder")
# ae.add(layers.InputLayer(shape=(WIN_SIZE, 2)))
ae.add(layers.InputLayer(shape=(WIN_SIZE, 3)))
ae.add(layers.LSTM(32))
# ae.add(layers.Dropout(rate=0.2))
ae.add(layers.RepeatVector(WIN_SIZE))
ae.add(layers.LSTM(16, return_sequences=True))
# ae.add(layers.Dropout(rate=0.2))
ae.add(layers.TimeDistributed(layers.Dense(1)))
ae.add(layers.Reshape((WIN_SIZE,)))
ae.compile(optimizer="adam", loss="mae")
ae.summary()

In [None]:
# early_stopping = EarlyStopping(
#     monitor="val_loss",
#     patience=5,
#     restore_best_weights=True,
# )

# history = ae.fit(
#     training_windows,
#     training_labels,
#     epochs=30,
#     batch_size=32,
#     validation_split=0.2,
#     shuffle=False,
#     callbacks=[early_stopping],
# )

In [11]:
# |exporti

# ae.save("autoencoder_vol.keras")
# with open("history_vol.pickle", "wb") as f:
#     pickle.dump(history.history, f)

# with open("history.pickle", "rb") as f:
#     history = pickle.load(f)

# ae = load_model("autoencoder.keras")

with open("history_vol.pickle", "rb") as f:
    history = pickle.load(f)

ae = load_model("autoencoder_vol.keras")

In [None]:
# plt.plot(history.history["loss"], label="Training Loss")
# plt.plot(history.history["val_loss"], label="Validation Loss")
plt.plot(history["loss"], label="Training Loss")
plt.plot(history["val_loss"], label="Validation Loss")
plt.legend()
plt.show()

In [None]:
train_predictions = ae.predict(training_windows)
train_mae_loss = np.mean(np.abs(train_predictions - training_labels), axis=1)

In [None]:
THRESHOLD = np.percentile(train_mae_loss, 90)
THRESHOLD

In [None]:
fig = px.histogram(
    train_mae_loss[train_mae_loss < THRESHOLD],
    nbins=100,
    title="Training MAE Loss Distribution",
)
fig.show()

In [None]:
# Demonstrate the threshold with AAPL

test_predictions = ae.predict(testing_windows["AAPL"])
test_mae_loss = np.mean(np.abs(test_predictions - testing_labels["AAPL"]), axis=1)
plot_data = sp500["AAPL"].loc[sp500["AAPL"].index >= "2021-01-01"]

fig = px.line(
    x=plot_data[:-WIN_SIZE].index,
    y=test_mae_loss,
    title="Anomalies Detected on AAPL",
    labels={"x": "Date", "y": "MAE Loss"},
)
fig.add_hline(
    y=THRESHOLD,
    line_dash="dash",
    annotation_text="Threshold",
    annotation_position="top left",
    line_color="red",
)
fig.show()

In [18]:
import ipywidgets as widgets
from IPython.display import HTML

In [19]:
fig = go.FigureWidget()
fig_returns = go.FigureWidget()
anomaly_list = widgets.Output()


def on_click(stock):
    test_pred = ae.predict(testing_windows[stock], verbose=0)
    test_mae_loss = np.mean(np.abs(test_pred - testing_labels[stock]), axis=1)

    # threshold = np.percentile(test_mae_loss, 95)

    # is_anomaly = test_mae_loss > threshold
    is_anomaly = test_mae_loss > THRESHOLD
    plot_data = sp500[stock].loc[sp500[stock].index >= "2021-01-01"]
    anomalies = plot_data[:-WIN_SIZE][is_anomaly]
    with anomaly_list:
        anomaly_list.clear_output()
        anomalies_data = anomalies.loc[:, ["adjusted_close", "log_return"]]
        anomalies_data["return%"] = (np.exp(anomalies_data["log_return"]) - 1) * 100
        anomalies_data["period_mae"] = test_mae_loss[is_anomaly]

        #

        anomalies_data["threshold"] = THRESHOLD
        anomalies_data.index.name = "start_of_period"
        display(
            HTML(
                "<div style='overflow: auto; height: 500px; width: fit-content'>"
                + anomalies_data.to_html()
                + "</div>"
            )
        )
    fig.data = []
    fig_returns.data = []

    fig.add_trace(
        px.line(
            data_frame=plot_data,
            x=plot_data.index,
            y="adjusted_close",
            title=f"Stock Price and Anomalies for {stock}",
            width=1200,
        ).data[0]
    )
    fig_returns.add_trace(
        px.line(
            data_frame=plot_data,
            x=plot_data.index,
            y="log_return",
            title=f"Stock Returns and Anomalies for {stock}",
            width=1200,
        ).data[0]
    )

    fig.add_trace(
        go.Scatter(
            x=anomalies.index,
            y=plot_data.loc[anomalies.index, "adjusted_close"],
            mode="markers",
            marker=dict(color="red", size=3),
            name="Anomaly",
        )
    )
    fig_returns.add_trace(
        go.Scatter(
            x=anomalies.index,
            y=plot_data.loc[anomalies.index, "log_return"],
            mode="markers",
            marker=dict(color="red", size=3),
            name="Anomaly",
        )
    )

    fig.update_layout(
        title=f"Stock Price and Anomalies for {stock}",
        xaxis_title="Time",
        yaxis_title="Price",
    )
    fig_returns.update_layout(
        title=f"Stock Returns and Anomalies for {stock}",
        xaxis_title="Time",
        yaxis_title="log Returns",
    )

    # with output:
    #     output.outputs = []
    # fig.show()

In [None]:
stock_dropdown = widgets.Dropdown(
    options=testing_windows.keys(),
    description="Stock:",
    disabled=False,
)

submit_button = widgets.Button(description="Submit")
submit_button.on_click(lambda _: on_click(stock_dropdown.value))

toolbar = widgets.HBox([stock_dropdown, submit_button])
plots_display = widgets.VBox([fig, fig_returns], layout=widgets.Layout(width="70%"))
anomalies_display = widgets.VBox(
    [
        widgets.HTML("<h2>List of Anomalies</h2>"),
        anomaly_list,
    ]
)
results_display = widgets.HBox(
    [plots_display, anomalies_display],
    layout=widgets.Layout(display="flex"),
)
ui = widgets.VBox([toolbar, results_display])

display(ui)