In [1]:
import yfinance as yf
from matplotlib import pyplot as plt

import numpy as np
import pandas as pd
import math as m

from sklearn.preprocessing import StandardScaler
# from tensorboard_logger import configure

import copy
import sys
import os

import prep_data
import train

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader, TensorDataset

from models.transformer import Transformer
from models.rnn import RNN, RNN5Day
from models.transformer_encoder import TransformerEncoder
from models.cnn import CNN
from models.lstm import LSTM

import mpu.io
import json

In [2]:
seed = 1
torch.manual_seed(seed)

<torch._C.Generator at 0x207d71808f0>

In [3]:
if torch.cuda.is_available():
 dev = "cuda:0"
else:
 dev = "cpu"
device = torch.device(dev)

print(dev)

cuda:0


In [4]:
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)


def jsonify(json_data):
    return json.loads(json.dumps(json_data, cls=NumpyEncoder))

In [5]:
BATCH_SIZE = [512]
DROPOUT = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
HORIZON = [1]
PERIODS = {
    "train": ("2010-01-01", "2017-12-31"),
    "test": ("2018-01-01", "2019-12-31"),
    "gfc":("2008-01-01", "2008-12-31"),
    "covid":("2020-01-01", "2022-12-31")
 }

for horizon in HORIZON:
  for batch_size in BATCH_SIZE:
    for dropout in DROPOUT:
      MODELS = [LSTM(dropout=dropout), Transformer(dropout=dropout), CNN(dropout=dropout), RNN(dropout=dropout), ]
      for models in MODELS:
          model, best_model, loss, train_pred, test_pred, _ = train.train(
              model=models,
              seq_len=128,
              horizon=horizon,
              max_epoch=3,
              batch_size=batch_size,
              loss=nn.MSELoss(),
              optimizer=optim.Adam,
              lr=1e-4,
              device=dev
          )
          config = f"{model.model_type}_dropout-{dropout}_seed-{seed}"
          try:
              os.mkdir(config)
          except:
              pass

          os.chdir(config)
          torch.save(model, f"model_{config}.pth")
          torch.save(best_model, f"best_model_{config}.pth")
          loss.to_csv(f"metadata_{config}.csv")
          
          if horizon == 1: 
              train_pred.to_csv(f"train_{config}.csv")
              test_pred.to_csv(f"test_{config}.csv")
          if horizon != 1:
              mpu.io.write(f"train_{config}.json", jsonify(train_pred))
              mpu.io.write(f"test_{config}.json", jsonify(test_pred))

          for periods, range in PERIODS.items():
            start_date, end_date = range
            date_range = pd.date_range(start_date, end_date)
            df, loss = train.test(
              model=model,
              seq_len=128,
              horizon=horizon,
              batch_size=0,
              start_date=start_date,
              end_date=end_date,
              loss=nn.MSELoss(),
              lr=1e-4,
              scale_method="std",
              device=dev
            )

            if horizon == 1:
              df = pd.DataFrame({"y": df["y"], "pred": df["pred"]},  index=date_range)

            if horizon != 1:
              df["index"] = date_range
              df["yy"] = np.array([i[4] for i in df["y"]])
              df["5-day"] = np.array([i[4] for i in df["pred"]])
              # print(df)
              # print(df["yy"].shape, df["pred"].shape)

              # print(df["5-day"])

              df = pd.DataFrame({"y" : df["yy"], "pred": df["5-day"]}, index=df["index"])

          os.chdir("..")



[*********************100%***********************]  1 of 1 completed
  df.fillna(method="ffill", inplace=True)


num_params: 463489
Epoch 000 | train loss 0.5227 | test_loss 0.5070 | wall_time 1.6912 | process_time 4.7812
Epoch 001 | train loss 0.5222 | test_loss 0.5058 | wall_time 1.2425 | process_time 4.1094


[*********************100%***********************]  1 of 1 completed

Epoch 002 | train loss 0.5224 | test_loss 0.5056 | wall_time 1.2784 | process_time 4.1875



  df.fillna(method="ffill", inplace=True)
[*********************100%***********************]  1 of 1 completed
  df.fillna(method="ffill", inplace=True)
[*********************100%***********************]  1 of 1 completed
  df.fillna(method="ffill", inplace=True)
[*********************100%***********************]  1 of 1 completed
  df.fillna(method="ffill", inplace=True)
[*********************100%***********************]  1 of 1 completed
  df.fillna(method="ffill", inplace=True)


num_params: 2316737


  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)


KeyboardInterrupt: 