In [1]:
import os
import pandas as pd
from contextlib import nullcontext
import matplotlib.pyplot as plt
from datetime import datetime, date, timedelta, timezone
import pytz

import ipywidgets as widgets
from IPython.display import display

import numpy as np
import torch
import random

from model import GPTConfig, GPT, load_model
from data import data_columns, get_data_for_eval, decode_data, encode_data, get_ticker_data
from stockdata import StockData


In [2]:
currentDir = '.'

In [3]:
# -----------------------------------------------------------------------------
# configs
# I/O
out_dir = 'out'

# system
device = 'mps' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1' etc., or try 'mps' on macbooks
dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32', 'bfloat16', or 'float16', the latter will auto implement a GradScaler

# various inits, derived attributes, I/O setup
seed_offset = 0

torch.manual_seed(1337 + seed_offset)
torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
device_type = 'cuda' if 'cuda' in device else 'cpu' # for later use in torch.autocast
# note: float16 data type will automatically use a GradScaler
ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)


In [4]:
# returns whether the final prediction is the same direction as the ground truth (e.g. both negative)
def predict_and_plot(cutoff_date, predict_days, all_data_df, all_orig_df, model):
    pred_is_correct = False
    historical_datapoints = 40 # plot these on the graph as context
    
    if cutoff_date is None:
        cutoff_date = datetime.now().date()
    plt.figure(figsize=(10, 5)) 
    std_columns = ['close_std', 'open_std', 'high_std', 'low_std', 'volume_std', 'vix']
    iter_count = 5
    context_df = all_data_df[all_data_df.Date <= cutoff_date]
    context = encode_data(context_df)
    # std as delta percentage at the prediction cutoff date
    cutoff_data = all_orig_df[all_orig_df.Date <= cutoff_date].iloc[-1]
    std_close = cutoff_data.DeltaClose_std * cutoff_data.Close
    last_close = cutoff_data.Close
    print(f"baseline {cutoff_data.Date} close {last_close} std_close {std_close}")

    preds = []
    for i in range(iter_count):
        y = model.generate(context.to(device), max_new_tokens=predict_days*len(data_columns), temperature=0.3)
        pred = decode_data(y) # pred includes all the context
        new_pred = pred[-predict_days:].copy().reset_index()
        new_pred.loc[:, 'close_std'] = (new_pred.close_bucket - StockData.CLOSE_LABELS.min()).map(lambda x: StockData.BIN_VALUES[x])
        new_pred.loc[:, 'open_std'] = (new_pred.open_bucket - StockData.OPEN_LABELS.min()).map(lambda x: StockData.BIN_VALUES[x])
        new_pred.loc[:, 'high_std'] = (new_pred.high_bucket - StockData.HIGH_LABELS.min()).map(lambda x: StockData.BIN_VALUES[x])
        new_pred.loc[:, 'low_std'] = (new_pred.low_bucket - StockData.LOW_LABELS.min()).map(lambda x: StockData.BIN_VALUES[x])
        new_pred.loc[:, 'volume_std'] = (new_pred.volume_bucket  - StockData.VOLUME_LABELS.min()).map(lambda x: StockData.BIN_VALUES[x])
        new_pred.loc[:, 'vix'] = (new_pred.vix_bucket  - StockData.VIX_LABELS.min()).map(lambda x: StockData.VIX_BINS[x])
        # plt.plot(new_pred.close_std.cumsum() * std_close, label='prediction') 
    
        preds.append(new_pred)
    
    new_pred = preds[0]
    for i in range(1, len(preds)):
        new_pred += preds[i]
    new_pred /= len(preds)
    new_pred['direction_color'] = 'red'
    new_pred.loc[new_pred['close_direction'] <= (StockData.UP_LABEL + StockData.DOWN_LABEL)/2.0, 'direction_color'] = 'green'
    
    # We trust the direction prediction more. If the actual prediction differ, flip the price prediction direction
    # new_pred.loc[(new_pred.direction_color == 'red') & (new_pred.close_std > 0), 'close_std'] *= -0
    # new_pred.loc[(new_pred.direction_color == 'green') & (new_pred.close_std < 0), 'close_std'] *= -0
    
    print(f"Predicting for dates > {context_df.iloc[-1].Date}")
    print(f"=== close mean {new_pred.close_std.mean()} volume mean {new_pred.volume_std.mean()} vix {new_pred.vix.mean()+2.5} ===")
    print(f"=== open mean {new_pred.open_std.mean()} high mean {new_pred.high_std.mean()} low mean {new_pred.low_std.mean()} ===")
    # print(new_pred[std_columns])
    print("")
    pred_val_delta = new_pred.close_std.cumsum() * std_close
    plt.scatter(y=pred_val_delta, x=new_pred.index + historical_datapoints, label='avg prediction', color=new_pred['direction_color']) 
    
    # print out the ground truth
    merged_orig = pd.merge(all_orig_df, all_data_df[['Date', 'tnx_bucket', 'vix_bucket']], on='Date', how='right')

    ground_truth = merged_orig.iloc[len(context_df) - historical_datapoints:len(pred)].copy().reset_index()
    ground_truth.loc[:, 'close_std'] = (ground_truth.close_bucket - StockData.CLOSE_LABELS.min()).map(lambda x: StockData.BIN_VALUES[x])
    ground_truth.loc[:, 'open_std'] = (ground_truth.open_bucket - StockData.OPEN_LABELS.min()).map(lambda x: StockData.BIN_VALUES[x])
    ground_truth.loc[:, 'high_std'] = (ground_truth.high_bucket - StockData.HIGH_LABELS.min()).map(lambda x: StockData.BIN_VALUES[x])
    ground_truth.loc[:, 'low_std'] = (ground_truth.low_bucket - StockData.LOW_LABELS.min()).map(lambda x: StockData.BIN_VALUES[x])
    ground_truth.loc[:, 'volume_std'] = (ground_truth.volume_bucket  - StockData.VOLUME_LABELS.min()).map(lambda x: StockData.BIN_VALUES[x])
    ground_truth.loc[:, 'vix'] = (ground_truth.vix_bucket  - StockData.VIX_LABELS.min()).map(lambda x: StockData.VIX_BINS[x])
    print(f"=== ground truth from date (inclusive) {ground_truth.iloc[0].Date} ===")
    print(f"=== close mean {ground_truth.close_std.mean()} volume mean {ground_truth.volume_std.mean()} vix mean {ground_truth.vix.mean()} ===")
    print(f"=== open mean {ground_truth.open_std.mean()} high mean {ground_truth.high_std.mean()} low mean {ground_truth.low_std.mean()} ===")
    # print(ground_truth[std_columns])
    # plt.plot(ground_truth.close_std.cumsum() * std_close, label='actual', color='black') 
    ground_truth_delta = ground_truth.Close - last_close
    plt.plot(ground_truth_delta, label='original', color='blue') 

    if (pred_val_delta.iloc[-1] > 0) == (ground_truth_delta.iloc[-1] > 0):
        pred_is_correct = True
    
    plt.legend()  # Adds a legend to distinguish the lines
    plt.grid(True)
    plt.show()  # Displays the plot
    return pred_is_correct

In [5]:
# model init
# def load_model(device, out_dir, ckpt_file):
#     model_args = dict()
    
#     ckpt_path = os.path.join(out_dir, ckpt_file)
#     if not os.path.exists(ckpt_path):
#         print("can't find checkpoint file: " + ckpt_path)
#         exit(1)
    
#     checkpoint = torch.load(ckpt_path, map_location=device, weights_only=False)
#     checkpoint_model_args = checkpoint['model_args']
#     # force these config attributes to be equal otherwise we can't even resume training
#     # the rest of the attributes (e.g. dropout) can stay as desired from command line
#     for k in ['n_layer', 'n_head', 'n_embd', 'block_size', 'bias', 'vocab_size']:
#         model_args[k] = checkpoint_model_args[k]
#     # create the model
#     gptconf = GPTConfig(**model_args)
#     model = GPT(gptconf)
#     state_dict = checkpoint['model']
#     # fix the keys of the state dictionary :(
#     # honestly no idea how checkpoints sometimes get this prefix, have to debug more
#     unwanted_prefix = '_orig_mod.'
#     for k,v in list(state_dict.items()):
#         if k.startswith(unwanted_prefix):
#             state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
#     model.load_state_dict(state_dict)
    
#     model.to(device)
#     model.eval()
#     checkpoint = None # free up memory
    
#     print(gptconf)
#     return model

In [6]:
def backtest(model, data_for_eval, ticker_df):
    # Create a scrollable output widget with a fixed height of 500 pixels
    output_area = widgets.Output(layout={'border': '1px solid black', 'width': '100%', 'height': '1000px', 'overflow_y': 'scroll'})
    display(output_area)
    
    # Generate plots within the scrollable output area
    with output_area:
        correct_count = 0
        total_count = 0
        for year in range(2023, 2025):
            for month in range(1, 13):
                # day = random.randint(1, 28)
                day = 6
                correct = predict_and_plot(date(year, month, day), 20, data_for_eval, ticker_df, model)
                total_count += 1
                if correct:
                    print("correct")
                    correct_count += 1
    
        print(f"correction predictions {correct_count} / {total_count}")

### Init experiment

In [7]:

# ckpt_file = 'ckpt_96_vocab_600_block_4_4_1.232val_15drop.pt' # pretty good - 28, 26, 29, 25, 26, 23, 30 5day - 27, 33, 7th: 25, 20, 15th (up 22): 23, 26
# ckpt_file = 'ckpt_94_vocab_512_block_1.384val.pt' # good balance of up and down
# ckpt_file = 'ckpt_96_vocab_600_block_4_6_1.256val.pt' # pretty good, didn't do 2023 too well - 27
# ckpt_file = 'ckpt_96_vocab_600_block_4_6_1.224val_15drop.pt' # 27, 26, 25, 21
# ckpt_file = 'ckpt_96_vocab_600_block_4_6_1.7M_1.227val_15drop.pt' # pretty good - 28, 31, 27, 27, 25, 26, 30 5day - 28, 29 (up 33), 7th: 22, 23, 15th: 14, 24

# model = load_model(device, out_dir, ckpt_file)

In [12]:
# refresh the most recent data
ticker = 'spy'

get_ticker_data("^VIX", f"{currentDir}/data", False, False)
get_ticker_data("^TNX", f"{currentDir}/data", False, False)
ticker_sd, _ = get_ticker_data(ticker, f"{currentDir}/data", False, False)
all_data_df = get_data_for_eval(ticker, data_dir=f"{currentDir}/data")

In [15]:
output_area = widgets.Output(layout={'border': '1px solid black', 'width': '100%', 'height': '2400px', 'overflow_y': 'scroll'})
display(output_area)

ckpt_files = ['ckpt_96_vocab_600_block_4_4_1.232val_15drop.pt', 'ckpt_96_vocab_600_block_4_6_1.7M_1.227val_15drop.pt', 
              'ckpt_96_vocab_600_block_4_4_1.2322val_20drop.pt', 'ckpt_96_vocab_600_block_4_6_1.7M_1.2331val_20drop.pt',
              'rl_model_episode_90_acc_0.600.pt']
ckpt_files = ['rl_model_episode_90_acc_0.600.pt', 'ckpt_96_vocab_600_block_4_6_1.7M_1.2331val_20drop.pt']
# Generate plots within the scrollable output area
with output_area:
    for ckpt_file in ckpt_files:
        model = load_model(device, out_dir, ckpt_file)
        predict_and_plot(None, 20, all_data_df, ticker_sd.df, model)

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…

### backtesting

In [None]:
ckpt_file = 'ckpt_96_vocab_600_block_4_6_1.7M_1.2331val_20drop.pt'
# ckpt_file = 'ckpt_96_vocab_600_block_4_6_1.7M_1.2331val_20drop.pt'
# ckpt_file = 'ckpt_96_vocab_600_block_4_4_1.232val_15drop.pt'
# ckpt_file = 'ckpt_96_vocab_600_block_4_6_1.7M_1.227val_15drop.pt'
backtest(load_model(device, out_dir, ckpt_file), all_data_df, ticker_sd.df)

number of parameters: 1.57M
GPTConfig(block_size=600, vocab_size=96, n_layer=4, n_head=6, n_embd=180, dropout=0.0, bias=False)


Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…

## Intraday

In [20]:
# before: the number of days before the current day. E.g. 1 means yesterday
# Returns a datatime object that corresponds to the day_before at 18:55 minus predict_len * 5min
def get_start_time_for_day_before(before: int, predict_len: int) -> datetime:
    # Get the current date and time in UTC
    current_time = datetime.now(pytz.timezone('America/Los_Angeles'))
    
    # Calculate the date before the specified number of days
    day_before_date = current_time - timedelta(days=before)
    
    # Create a datetime object for the day_before_date at 11:55 Pacific time
    end_time = day_before_date.replace(hour=11, minute=55, second=0, microsecond=0)
    return end_time - timedelta(minutes=predict_len*5)    

def backtest_intraday(model, data_for_eval, ticker_df):
    # Create a scrollable output widget with a fixed height of 500 pixels
    output_area = widgets.Output(layout={'border': '1px solid black', 'width': '100%', 'height': '2000px', 'overflow_y': 'scroll'})
    display(output_area)
    
    # Generate plots within the scrollable output area
    with output_area:
        total_count = 0
        correct_count = 0
        for i in range(60):
            start_time = get_start_time_for_day_before(i, predict_len)
            if start_time.weekday() >= 5:
                continue

            total_count += 1
            correct = predict_and_plot(start_time, predict_len, data_for_eval, ticker_df, model)
            if correct:
                print("correct")
                correct_count += 1
    
        print(f"correction predictions {correct_count} / {total_count}")

In [21]:

# ckpt_file = 'ckpt_intraday_96_vocab_1.7M_1.044val_15drop.pt'
# ckpt_file = 'ckpt_96_vocab_600_block_4_4_1.232val_15drop.pt'
# ckpt_file = 'ckpt_96_vocab_600_block_4_6_1.256val.pt' # pretty good, didn't do 2023 too well - 27
# ckpt_file = 'ckpt_96_vocab_600_block_4_6_1.224val_15drop.pt' # 27, 26, 25, 21
# ckpt_file = 'ckpt_96_vocab_600_block_4_6_1.7M_1.227val_15drop.pt' # pretty good - 28, 31, 27, 27, 25, 26, 30 5day - 28, 29 (up 33), 7th: 22, 23, 15th: 14, 24
model = load_model(device, out_dir, "ckpt.pt")

number of parameters: 3.48M
GPTConfig(block_size=600, vocab_size=96, n_layer=5, n_head=6, n_embd=240, dropout=0.0, bias=False)


In [22]:
ticker = 'SPY'
predict_len = 10 # number of data points to predict
cutoff_datetime = get_start_time_for_day_before(1, predict_len)

get_ticker_data("^VIX", f"{currentDir}/data_intra_day", intra_day=True, use_cache=False)
get_ticker_data("^TNX", f"{currentDir}/data_intra_day", intra_day=True, use_cache=False)
ticker_sd, _ = get_ticker_data(ticker, f"{currentDir}/data_intra_day", intra_day=True, use_cache=False)
all_data_df = get_data_for_eval(ticker, data_dir=f"{currentDir}/data_intra_day", intra_day=True)

output_area = widgets.Output(layout={'border': '1px solid black', 'width': '100%', 'height': '2000px', 'overflow_y': 'scroll'})
display(output_area)

# Generate plots within the scrollable output area
with output_area:
    for _ in range(3):
        predict_and_plot(cutoff_datetime, predict_len, all_data_df, ticker_sd.df, model)

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…

In [23]:
backtest_intraday(model, all_data_df, ticker_sd.df)

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…