<a href="https://colab.research.google.com/github/pollyjuice74/marketGAT/blob/master/marketGPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import datetime as dt
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import seaborn as sns

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader

!pip install torch-geometric
from torch_geometric.data import Data, Batch
from torch_geometric.data import TemporalData, HeteroData

import os
if not os.path.exists('marketGAT'):
  pass
  #!git clone https://github.com/pollyjuice74/marketGAT.git
#os.chdir('marketGAT')

# from utils import *
# from data import *
# from model import *

Collecting torch-geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-geometric
Successfully installed torch-geometric-2.5.3


In [2]:
endDate = dt.datetime.now()
startDate = endDate - dt.timedelta(days=730)
n = 18
prev_context = 324
sample_size = 342 # 324 + 18
batch_size = 64
alpha = 0.15


# def get_portfolio(portfolio_list):
#   return yf.download(portfolio_list, start=startDate, end=endDate, interval='1h')["Close"] # Portfolio


# ### STOCKS  ###
# stocks = ["SPY", "AMZN", "TSLA", "AAPL", "GOOGL", "META", "GM", "MS"]
# market = get_portfolio("SPY")

# # get stock data
# stocks = [get_portfolio(s) for s in stocks]

# # to tensors
# align = lambda s: s[-len(market):]
# stocks = torch.stack([align(torch.tensor(s, dtype=torch.float)) for s in stocks])
# stocks /= stocks.mean(1, keepdim=True)

# market, stocks

In [17]:



def sym_graph(symbol, interval='1h'):
  graph = HeteroData()
  df = yf.download(symbol, start=startDate, end=endDate, interval=interval)

  # Data
  msg_features = ['High', 'Low', 'Close', 'Open']
  msg = torch.tensor(df[msg_features][:-1].values, dtype=torch.float)
  t = df.index.strftime('%H%M%S').astype('int64')
  node_ids = torch.tensor(df.index.strftime('%Y%m%d%H%M%S').astype('int64'), dtype=torch.int64)
  #print(node_ids, msg, t)

  # Add data to graph
  graph.sym = symbol
  graph[symbol].x = msg
  graph[symbol].t = t
  graph[symbol].node_ids = node_ids

  # Create edges based on time sequence
  num_nodes = msg.size(0)
  src = torch.arange(0, num_nodes - 1, dtype=torch.long)  # Indices from 0 to N-2
  dst = torch.arange(1, num_nodes, dtype=torch.long)
  graph[symbol, 'next_in_sequence', symbol].edge_index = torch.stack([src, dst], dim=0)

  return graph


def build_graph(stock_symbols):
  graph = HeteroData()
  sp = sym_graph('SPY')
  stock_graphs = [sym_graph(sym) for sym in stock_symbols]

  # Add stock graphs to graph
  for s_graph in stock_graphs:
    graph[s_graph.sym].x = s_graph[s_graph.sym].x
    graph[s_graph.sym].t = s_graph[s_graph.sym].t
    graph[s_graph.sym].node_ids = s_graph[s_graph.sym].node_ids
    graph[s_graph.sym, 'next_in_sequence', s_graph.sym].edge_index = s_graph[s_graph.sym, 'next_in_sequence', s_graph.sym].edge_index

  # Add SPY stock graph
  graph[sp.sym].x = sp[sp.sym].x
  graph[sp.sym].t = sp[sp.sym].t
  graph[sp.sym].node_ids = sp[sp.sym].node_ids
  graph[sp.sym, 'next_in_sequence', sp.sym].edge_index = sp[sp.sym, 'next_in_sequence', sp.sym].edge_index

  # Link all symbol nodes to SPY nodes at the 'same_time' t
  sp_nodes = graph[sp.sym].node_ids
  for sym in stock_symbols:
    stock_nodes = graph[sym].node_ids
    graph[sp.sym, 'same_time', sym].edge_index = link_graphs(sp_nodes, stock_nodes)

  return graph


def link_graphs(sp_nodes, stock_nodes):
  # SPY/Stock look up node_id: idx
  stock_mapping = {node_id.item(): idx for idx, node_id in enumerate(stock_nodes)}
  sp_mapping = {node_id.item(): idx for idx, node_id in enumerate(sp_nodes)}

  common_node_ids = set(stock_mapping.keys()).intersection(sp_mapping.keys())

  # Makes list of common node id's idxs
  stock_indices = torch.tensor(sorted([stock_mapping[nid] for nid in common_node_ids]), dtype=torch.long) ###
  sp_indices = torch.tensor(sorted([sp_mapping[nid] for nid in common_node_ids]), dtype=torch.long) ###

  return torch.stack([sp_indices, stock_indices], dim=0)


#######################################################################
def sample(graph, sym, sample_len=7*30, pred_len=7*5):
  sample_graph = HeteroData()

  # random stock sample index
  s_ix = torch.randint(0, graph[sym].x.size(0) - sample_len - pred_len, (1,))
  curr_ix = s_ix+sample_len

  # stock graph
  sample_graph[sym].x = graph[sym].x[s_ix:curr_ix + pred_len]
  sample_graph[sym].t = graph[sym].t[s_ix:curr_ix + pred_len]
  sample_graph[sym].node_ids = graph[sym].node_ids[s_ix:curr_ix + pred_len]

  # stock nodesIDs
  first_nodeID = graph[sym].node_ids[s_ix]
  curr_nodeID = graph[sym].node_ids[curr_ix]
  last_nodeID = graph[sym].node_ids[curr_ix + pred_len]

  # spy nodes ixs
  f_sp_ix = torch.where(graph['SPY'].node_ids == first_nodeID.item())[0]
  c_sp_ix = torch.where(graph['SPY'].node_ids == curr_nodeID.item())[0]
  l_sp_ix = torch.where(graph['SPY'].node_ids == last_nodeID.item())[0]

  # spy graph
  sample_graph['SPY'].x = graph['SPY'].x[f_sp_ix:l_sp_ix]
  sample_graph['SPY'].t = graph['SPY'].t[f_sp_ix:l_sp_ix]
  sample_graph['SPY'].node_ids = graph['SPY'].node_ids[f_sp_ix:l_sp_ix]

  # sample edges
  sym_edges = graph[sym, 'next_in_sequence', sym].edge_index[:, s_ix:curr_ix + pred_len] ###
  spy_edges = graph['SPY', 'next_in_sequence', 'SPY'].edge_index[:, f_sp_ix:l_sp_ix] ###
  same_time_edges =  graph['SPY', 'same_time', sym].edge_index[:, s_ix:curr_ix + pred_len -1] ###

  dicts, edge_ixs = make_dicts([spy_edges, sym_edges]) # spy, sym
  same_time_edges = same_time_ix(same_time_edges, dicts) # convert same time ixs from graph ixs to sample graph ixs

  # set edges
  sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index = edge_ixs[0]
  sample_graph[sym, 'next_in_sequence', sym].edge_index = edge_ixs[1]
  sample_graph['SPY', 'same_time', sym].edge_index = same_time_edges

  # normalize stock data
  sample_graph[sym].x_samp, sample_graph[sym].x_pred = normalize(sample_graph[sym].x, curr_ix=curr_ix - s_ix, pred_ix=curr_ix+pred_len - s_ix) # shifts ixs by first stock ix
  sample_graph['SPY'].x_samp, sample_graph['SPY'].x_pred = normalize(sample_graph['SPY'].x, curr_ix=c_sp_ix - f_sp_ix, pred_ix=l_sp_ix - f_sp_ix) # shifts ixs by first spy stock ix

  # EDGE_INDEX_SAMP, EDGE_INDEX_PRED
  # Symbol edges #
  x_samp_len = sample_graph[sym].x_samp.shape[0]
  ix = (sample_graph[sym, 'next_in_sequence', sym].edge_index == x_samp_len).nonzero(as_tuple=False)[0,1].item()
  # make edges
  edge_index_samp = sample_graph[sym, 'next_in_sequence', sym].edge_index[:, :ix-1]
  #edge_index_samp = torch.cat((edge_index_samp[:, 0].unsqueeze(1), edge_index_samp), dim=1) # mask dim ###
  edge_index_pred = sample_graph[sym, 'next_in_sequence', sym].edge_index[:, ix-1:-1]
  # Set in sample graph
  sample_graph[sym, 'next_in_sequence', sym].edge_index_samp = edge_index_samp
  sample_graph[sym, 'next_in_sequence', sym].edge_index_pred = edge_index_pred

  # SPY edges #
  # Translate indexes
  sp_same_ix, sym_ix = (same_time_edges  == x_samp_len).nonzero(as_tuple=False)[:, 1].tolist()
  sp_ix = (sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index == sp_same_ix).nonzero(as_tuple=False)[0,1].item()
  # make edges
  sp_ei_samp = sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index[:, :sp_ix] ###
  sp_ei_pred = sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index[:, sp_ix:] ###
  # Set in sample graph
  sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index_samp = sp_ei_samp
  sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index_pred = sp_ei_pred
  ######

  # REMOVE REDUNDANT sample_graph.x, sample_graph.edge_index #
  del sample_graph[sym].x
  del sample_graph['SPY'].x

  del sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index
  del sample_graph[sym, 'next_in_sequence', sym].edge_index

  return sample_graph, len(sample_graph[sym].x_samp)
#######################################################################


def make_dicts(edge_sets):
  dicts = list()
  edge_ixs = list()

  for edge_samp in edge_sets:
    num_nodes = edge_samp.shape[1]

    edge_ix = torch.stack([torch.arange(0, num_nodes, dtype=torch.long),    # [[0, 1, 2, ...],
                          torch.arange(1, num_nodes+1, dtype=torch.long)], dim=0) #  [1, 2, 3, ...]]

    # dict of graph ixs to sample graph ixs
    gix_to_sgix = {edge_samp[j, i].item(): edge_ix[j, i].item()
                    for i in range(num_nodes) for j in range(2)} # [2, num_nodes]
    # gix_to_sgix[edge_samp[1, num_nodes-2].item()] = edge_ix[1, num_nodes-2].item()

    dicts.append(gix_to_sgix)
    edge_ixs.append(edge_ix)

  return dicts, edge_ixs # spy, sym


def same_time_ix(same_time, dicts):
  edge_ix = torch.zeros_like(same_time) # same shape

  for i in range(same_time.shape[1]):
    edge_ix[0, i] = dicts[0][same_time[0, i].item()] # spy dict
    edge_ix[1, i] = dicts[1][same_time[1, i].item()] # sym dict

  return edge_ix


def step(sample_graph, ix, sym):
  sp_ix_raw, _ = sample_graph['SPY', 'same_time', sym][:, ix]

  sample_graph[sym].x_samp = torch.cat([sample_graph[sym].x_samp, sample_graph[sym].x_pred[:1]])
  sample_graph[sym].x_pred = sample_graph[sym].x_pred[1:]

  sp_edge_indices = sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index[1]
  sp_ix = torch.where(sp_edge_indices == sp_ix_raw)[0].item()
  print(len(sp_edge_indices), sp_ix_raw, sp_ix)

  sample_graph['SPY'].x_samp = sample_graph['SPY'].x[:sp_ix]
  sample_graph['SPY'].x_pred = sample_graph['SPY'].x[sp_ix:]

  return ix+1


def normalize(x, curr_ix, pred_ix):
  x_samp = x[:curr_ix]
  last_close = float(x_samp[-1, 2]) # Scale by last Close price

  x_samp /= last_close

  x_pred = x[curr_ix:pred_ix+1]
  x_pred /= last_close
  return x_samp, x_pred


def movement(x_pred, pct=0.03):
    high = max(x_pred[0])
    low = min(x_pred[1])

    if high >= 1 + pct:
        y = torch.tensor([1, 0, 0])  # above Pct return
    elif low <= 1 - pct:
        y = torch.tensor([0, 0, 1])  # below Pct return
    else:
        y = torch.tensor([0, 1, 0])  # within Pct return

    return y


sym = 'TCRX'
graph = build_graph(['AMZN', 'MSFT', 'TCRX'])
sample_graph, ix = sample(graph, sym)
print(graph.metadata())
sample_graph

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

(['AMZN', 'MSFT', 'TCRX', 'SPY'], [('AMZN', 'next_in_sequence', 'AMZN'), ('MSFT', 'next_in_sequence', 'MSFT'), ('TCRX', 'next_in_sequence', 'TCRX'), ('SPY', 'next_in_sequence', 'SPY'), ('SPY', 'same_time', 'AMZN'), ('SPY', 'same_time', 'MSFT'), ('SPY', 'same_time', 'TCRX')])





HeteroData(
  TCRX={
    t=Index([153000,  93000, 103000, 113000, 123000, 133000, 143000, 153000,  93000,
       103000,
       ...
       123000, 133000, 143000, 153000,  93000, 103000, 113000, 123000, 133000,
       143000],
      dtype='int64', name='Datetime', length=245),
    node_ids=[245],
    x_samp=[210, 4],
    x_pred=[35, 4],
  },
  SPY={
    t=Index([153000,  93000, 103000, 113000, 123000, 133000, 143000, 153000,  93000,
       103000,
       ...
       123000, 133000, 143000, 153000,  93000, 103000, 113000, 123000, 133000,
       143000],
      dtype='int64', name='Datetime', length=245),
    node_ids=[245],
    x_samp=[210, 4],
    x_pred=[35, 4],
  },
  (SPY, next_in_sequence, SPY)={
    edge_index_samp=[2, 210],
    edge_index_pred=[2, 35],
  },
  (TCRX, next_in_sequence, TCRX)={
    edge_index_samp=[2, 209],
    edge_index_pred=[2, 35],
  },
  (SPY, same_time, TCRX)={ edge_index=[2, 244] }
)

In [26]:
from torch_geometric.nn.conv import GATv2Conv#, HGTConv
from torch_geometric.nn import Linear
from torch_geometric.nn import to_hetero
import math
from typing import Dict, List, Optional, Tuple, Union

import torch
from torch import Tensor
from torch.nn import Parameter

from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.dense import HeteroDictLinear, HeteroLinear
from torch_geometric.nn.inits import ones
from torch_geometric.nn.parameter_dict import ParameterDict
from torch_geometric.typing import Adj, EdgeType, Metadata, NodeType, SparseTensor, Size, Any
from torch_geometric.utils import softmax, is_sparse


def construct_bipartite_edge_index(
    edge_index_dict: Dict[EdgeType, Adj],
    src_offset_dict: Dict[EdgeType, int],
    dst_offset_dict: Dict[NodeType, int],
    edge_attr_dict: Optional[Dict[EdgeType, Tensor]] = None,
    num_nodes: Optional[int] = None,
) -> Tuple[Adj, Optional[Tensor]]:

    is_sparse_tensor = False
    edge_indices: List[Tensor] = []
    edge_attrs: List[Tensor] = []
    for edge_type, src_offset in src_offset_dict.items():
        edge_index = edge_index_dict[edge_type]
        dst_offset = dst_offset_dict[edge_type[-1]]

        # TODO Add support for SparseTensor w/o converting.
        #print(edge_index, SparseTensor)
        is_sparse_tensor = isinstance(edge_index, SparseTensor)
        if is_sparse(edge_index):
            edge_index, _ = to_edge_index(edge_index)
            edge_index = edge_index.flip([0])
        else:
            edge_index = edge_index.clone()

        edge_index[0] += src_offset
        edge_index[1] += dst_offset
        edge_indices.append(edge_index)

        if edge_attr_dict is not None:
            if isinstance(edge_attr_dict, ParameterDict):
                value = edge_attr_dict['__'.join(edge_type)]
            else:
                value = edge_attr_dict[edge_type]
            if value.size(0) != edge_index.size(1):
                value = value.expand(edge_index.size(1), -1)
            edge_attrs.append(value)

    edge_index = torch.cat(edge_indices, dim=1)

    edge_attr: Optional[Tensor] = None
    if edge_attr_dict is not None:
        edge_attr = torch.cat(edge_attrs, dim=0)

    if is_sparse_tensor:
        edge_index = SparseTensor(
            row=edge_index[1],
            col=edge_index[0],
            value=edge_attr,
            sparse_sizes=(num_nodes, num_nodes),
        )

    return edge_index, edge_attr



class HGTConv(MessagePassing):
    def __init__(
        self,
        in_channels: Union[int, Dict[str, int]],
        out_channels: int,
        metadata: Metadata,
        heads: int = 1,
        **kwargs,
    ):
        super().__init__(aggr='add', node_dim=0, **kwargs)

        if out_channels % heads != 0:
            raise ValueError(f"'out_channels' (got {out_channels}) must be "
                             f"divisible by the number of heads (got {heads})")

        if not isinstance(in_channels, dict):
            in_channels = {node_type: in_channels for node_type in metadata[0]}

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.heads = heads
        self.node_types = metadata[0]
        self.edge_types = metadata[1]
        self.edge_types_map = {
            edge_type: i
            for i, edge_type in enumerate(metadata[1])
        }

        self.dst_node_types = set([key[-1] for key in self.edge_types])

        self.kqv_lin = HeteroDictLinear(self.in_channels,
                                        self.out_channels * 3)

        self.out_lin = HeteroDictLinear(self.out_channels, self.out_channels,
                                        types=self.node_types)

        dim = out_channels // heads
        num_types = heads * len(self.edge_types)

        self.k_rel = HeteroLinear(dim, dim, num_types, bias=False,
                                  is_sorted=True)
        self.v_rel = HeteroLinear(dim, dim, num_types, bias=False,
                                  is_sorted=True)

        self.skip = ParameterDict({
            node_type: Parameter(torch.empty(1))
            for node_type in self.node_types
        })

        self.p_rel = ParameterDict()
        for edge_type in self.edge_types:
            edge_type = '__'.join(edge_type)
            self.p_rel[edge_type] = Parameter(torch.empty(1, heads))

        self.reset_parameters()


    def reset_parameters(self):
        super().reset_parameters()
        self.kqv_lin.reset_parameters()
        self.out_lin.reset_parameters()
        self.k_rel.reset_parameters()
        self.v_rel.reset_parameters()
        ones(self.skip)
        ones(self.p_rel)


    def _cat(self, x_dict: Dict[str, Tensor]) -> Tuple[Tensor, Dict[str, int]]:
        """Concatenates a dictionary of features."""
        cumsum = 0
        outs: List[Tensor] = []
        offset: Dict[str, int] = {}
        for key, x in x_dict.items():
            outs.append(x)
            offset[key] = cumsum
            cumsum += x.size(0)
        return torch.cat(outs, dim=0), offset


    def _construct_src_node_feat(
        self, k_dict: Dict[str, Tensor], v_dict: Dict[str, Tensor],
        edge_index_dict: Dict[EdgeType, Adj]
    ) -> Tuple[Tensor, Tensor, Dict[EdgeType, int]]:
        """Constructs the source node representations."""
        cumsum = 0
        num_edge_types = len(self.edge_types)
        H, D = self.heads, self.out_channels // self.heads

        # Flatten into a single tensor with shape [num_edge_types * heads, D]:
        ks: List[Tensor] = []
        vs: List[Tensor] = []
        type_list: List[Tensor] = []
        offset: Dict[EdgeType] = {}
        for edge_type in edge_index_dict.keys():
            src = edge_type[0]
            N = k_dict[src].size(0)
            offset[edge_type] = cumsum
            cumsum += N

            # construct type_vec for curr edge_type with shape [H, D]
            edge_type_offset = self.edge_types_map[edge_type]
            type_vec = torch.arange(H, dtype=torch.long).view(-1, 1).repeat(
                1, N) * num_edge_types + edge_type_offset

            type_list.append(type_vec)
            ks.append(k_dict[src])
            vs.append(v_dict[src])

        ks = torch.cat(ks, dim=0).transpose(0, 1).reshape(-1, D)
        vs = torch.cat(vs, dim=0).transpose(0, 1).reshape(-1, D)
        type_vec = torch.cat(type_list, dim=1).flatten()

        k = self.k_rel(ks, type_vec).view(H, -1, D).transpose(0, 1)
        v = self.v_rel(vs, type_vec).view(H, -1, D).transpose(0, 1)

        return k, v, offset

    def propagate(
      self,
      edge_index: Adj,
      size: Size = None,
      **kwargs: Any,
      ) -> Tensor:

      decomposed_layers = 1 if self.explain else self.decomposed_layers

      for hook in self._propagate_forward_pre_hooks.values():
          res = hook(self, (edge_index, size, kwargs))
          if res is not None:
              edge_index, size, kwargs = res

      mutable_size = self._check_input(edge_index, size)

      # Run "fused" message and aggregation (if applicable).
      fuse = False
      if self.fuse and not self.explain:
          if is_sparse(edge_index):
              fuse = True
          elif (not torch.jit.is_scripting()
                and isinstance(edge_index, EdgeIndex)):
              if (self.SUPPORTS_FUSED_EDGE_INDEX
                      and edge_index.is_sorted_by_col):
                  fuse = True

      if fuse:
          coll_dict = self._collect(self._fused_user_args, edge_index,
                                    mutable_size, kwargs)

          msg_aggr_kwargs = self.inspector.collect_param_data(
              'message_and_aggregate', coll_dict)
          for hook in self._message_and_aggregate_forward_pre_hooks.values():
              res = hook(self, (edge_index, msg_aggr_kwargs))
              if res is not None:
                  edge_index, msg_aggr_kwargs = res
          out = self.message_and_aggregate(edge_index, **msg_aggr_kwargs)
          for hook in self._message_and_aggregate_forward_hooks.values():
              res = hook(self, (edge_index, msg_aggr_kwargs), out)
              if res is not None:
                  out = res

          update_kwargs = self.inspector.collect_param_data(
              'update', coll_dict)
          out = self.update(out, **update_kwargs)

      else:  # Otherwise, run both functions in separation.
          if decomposed_layers > 1:
              user_args = self._user_args
              decomp_args = {a[:-2] for a in user_args if a[-2:] == '_j'}
              decomp_kwargs = {
                  a: kwargs[a].chunk(decomposed_layers, -1)
                  for a in decomp_args
              }
              decomp_out = []

          for i in range(decomposed_layers):
              if decomposed_layers > 1:
                  for arg in decomp_args:
                      kwargs[arg] = decomp_kwargs[arg][i]

              coll_dict = self._collect(self._user_args, edge_index,
                                        mutable_size, kwargs)

              msg_kwargs = self.inspector.collect_param_data(
                  'message', coll_dict)
              for hook in self._message_forward_pre_hooks.values():
                  res = hook(self, (msg_kwargs, ))
                  if res is not None:
                      msg_kwargs = res[0] if isinstance(res, tuple) else res
              out = self.message(**msg_kwargs)
              for hook in self._message_forward_hooks.values():
                  res = hook(self, (msg_kwargs, ), out)
                  if res is not None:
                      out = res

              if self.explain:
                  explain_msg_kwargs = self.inspector.collect_param_data(
                      'explain_message', coll_dict)
                  out = self.explain_message(out, **explain_msg_kwargs)

              aggr_kwargs = self.inspector.collect_param_data(
                  'aggregate', coll_dict)
              for hook in self._aggregate_forward_pre_hooks.values():
                  res = hook(self, (aggr_kwargs, ))
                  if res is not None:
                      aggr_kwargs = res[0] if isinstance(res, tuple) else res

              out = self.aggregate(out, **aggr_kwargs)

              for hook in self._aggregate_forward_hooks.values():
                  res = hook(self, (aggr_kwargs, ), out)
                  if res is not None:
                      out = res

              update_kwargs = self.inspector.collect_param_data(
                  'update', coll_dict)
              out = self.update(out, **update_kwargs)

              if decomposed_layers > 1:
                  decomp_out.append(out)

          if decomposed_layers > 1:
              out = torch.cat(decomp_out, dim=-1)

      for hook in self._propagate_forward_hooks.values():
          res = hook(self, (edge_index, mutable_size, kwargs), out)
          if res is not None:
              out = res

      return out


    def forward(
        self,
        x_dict: Dict[NodeType, Tensor],
        edge_index_dict: Dict[EdgeType, Adj]  # Support both.
    ) -> Dict[NodeType, Optional[Tensor]]:
        F = self.out_channels
        H = self.heads
        D = F // H

        k_dict, q_dict, v_dict, out_dict = {}, {}, {}, {}

        # Compute K, Q, V over node types:
        kqv_dict = self.kqv_lin(x_dict)
        #print(kqv_dict)
        for key, val in kqv_dict.items():
            k, q, v = torch.tensor_split(val, 3, dim=1)
            k_dict[key] = k.view(-1, H, D)
            q_dict[key] = q.view(-1, H, D)
            v_dict[key] = v.view(-1, H, D)

        q, dst_offset = self._cat(q_dict)
        k, v, src_offset = self._construct_src_node_feat(
            k_dict, v_dict, edge_index_dict)

        # print(edge_index_dict)
        edge_index_used = torch.cat([edge_index_dict[key] for key in edge_index_dict], dim=1) # cat all tensors in edge_index_dict to shape [2, num_edges]

        edge_index, edge_attr = construct_bipartite_edge_index(
            edge_index_dict, src_offset, dst_offset, edge_attr_dict=self.p_rel,
            num_nodes=k.size(0))

        print(edge_index.shape, edge_index_used.shape)
        out = self.propagate(edge_index_used, k=k, q=q, v=v, edge_attr=edge_attr)

        #print(edge_index.shape, k.permute(2, 1, 0).shape, q.permute(0, 2, 1).shape, v.permute(2, 1, 0).shape, k.size(2))

        # Reconstruct output node embeddings dict:
        for node_type, start_offset in dst_offset.items():
            end_offset = start_offset + q_dict[node_type].size(0)
            if node_type in self.dst_node_types:
                out_dict[node_type] = out[start_offset:end_offset]

        # Transform output node embeddings:
        a_dict = self.out_lin({
            k:
            torch.nn.functional.gelu(v) if v is not None else v
            for k, v in out_dict.items()
        })

        # Iterate over node types:
        for node_type, out in out_dict.items():
            out = a_dict[node_type]

            if out.size(-1) == x_dict[node_type].size(-1):
                alpha = self.skip[node_type].sigmoid()
                out = alpha * out + (1 - alpha) * x_dict[node_type]
            out_dict[node_type] = out

        return out_dict


    def message(self, k_j: Tensor, q_i: Tensor, v_j: Tensor, edge_attr: Tensor,
                index: Tensor, ptr: Optional[Tensor],
                size_i: Optional[int]) -> Tensor:
        alpha = (q_i * k_j).sum(dim=-1) * edge_attr
        alpha = alpha / math.sqrt(q_i.size(-1))
        alpha = softmax(alpha, index, ptr, size_i)
        out = v_j * alpha.view(-1, self.heads, 1)
        return out.view(-1, self.out_channels)


    def __repr__(self) -> str:
        return (f'{self.__class__.__name__}(-1, {self.out_channels}, '
                f'heads={self.heads})')



class HGT(torch.nn.Module):
    def __init__(self, metadata, hidden_channels, num_classes=3, heads=1, num_layers=1):
        super().__init__()
        self.hidden_channels = hidden_channels
        # node_type: linear layer
        self.lin_dict = torch.nn.ModuleDict()

        # List of HGTConv layers
        self.convs = torch.nn.ModuleList()

        for _ in range(num_layers):
          conv = HGTConv(
              in_channels=hidden_channels,
              out_channels=hidden_channels,
              metadata=metadata,
              heads=heads,
          )
          self.convs.append(conv)

        self.classifier = nn.Linear(hidden_channels, num_classes)


    def forward(self, x_dict, edge_index_dict, edge_attr=None):
        # print([v.shape for k, v in x_dict.items()])
        print(len(edge_index_dict))

        for node_type, x in x_dict.items():
            out_channels = x.size(0)
            layer = Linear(out_channels, self.hidden_channels)
            x_dict[node_type] = layer(x.T).relu()

        #print([v.shape for k, v in x_dict.items()])
        #print(x_dict)

        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)

        return self.classifier(x_dict)



class Account:
    def __init__(self, hidden_channels=7*30, epochs=2000, learning_rate=1e-05):
        # Account info
        self.net_value = 416.29
        self.symbols = ['AMZN',] #"TSLA", "AAPL", "GOOGL", "META", "GM", "MS"]
        self.bets = 100 # Limited ammount of bets per day
        # Bets
        self.current_bets = {sym: 0 for sym in self.symbols}
        self.stocks = [] # Graphs of stocks
        # Data
        self.graph = build_graph(self.symbols)
        # Model
        self.model = HGT(metadata=self.graph.metadata(), hidden_channels=hidden_channels)
        # Training
        self.epochs = epochs
        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=learning_rate)
        self.criterion = nn.BCEWithLogitsLoss()
        ##################################################


    def train(self):
        for i in range(self.epochs):
            epoch_loss = 0
            for sym in self.symbols:
                print("\nTraining on: ", sym)

                for _ in range(1000): # sample 1000 times
                    sample_graph, ix = sample(self.graph, sym) # Samples Training graph nodes
                    pct = torch.std(sample_graph[sym].x_samp) * 0.5 # try to predict a half std movement
                    print("Percent pred: ", pct.item())

                    while sample_graph[sym].x_pred.numel() > 0: # while there is more data to predict

                        y_hat = self.model(sample_graph.x_samp_dict, sample_graph.edge_index_samp_dict)
                        y = movement(sample_graph[sym].x_pred[0], pct=pct.item())

                        loss = self.criterion(y_hat, y)
                        step(sample_graph, ix, sym)

                        self.optimizer.zero_grad(set_to_none=True)
                        loss.backward()
                        self.optimizer.step()

                epoch_loss += loss.item()

                self.print(i, epoch_loss, freq=25)


acc = Account()
acc.train()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Training on:  AMZN
Percent pred:  0.01675524190068245
2
torch.Size([2, 419]) torch.Size([2, 419])





IndexError: Found indices in 'edge_index' that are larger than 7 (got 209). Please ensure that all indices in 'edge_index' point to valid indices in the interval [0, 8) in your node feature matrix and try again.

In [4]:
#graph = build_graph(['AMZN', 'MSFT', 'TCRX'])
sample_graph, ix = sample(graph, sym)


#def sample_edges(sym):

# Symbol edges #
x_samp_len = sample_graph[sym].x_samp.shape[0]
ix = (sample_graph[sym, 'next_in_sequence', sym].edge_index == x_samp_len).nonzero(as_tuple=False)[0,1].item()
# set edges
edge_index_samp = sample_graph[sym, 'next_in_sequence', sym].edge_index[:, :ix-1]
edge_index_samp = torch.cat((edge_index_samp[:, 0].unsqueeze(1), edge_index_samp), dim=1) # mask dim

edge_index_pred = sample_graph[sym, 'next_in_sequence', sym].edge_index[:, ix-1:-1]


# SPY edges #
same_time = sample_graph['SPY', 'same_time', sym].edge_index
#print(same_time)

sp_same_ix, sym_ix = (same_time  == x_samp_len).nonzero(as_tuple=False)[:, 1].tolist()
sp_ix = (sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index == sp_same_ix).nonzero(as_tuple=False)[0,1].item()
print(sp_same_ix, sym_ix, sp_ix)

sp_ei_samp = sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index[:, :sp_ix] ###
sp_ei_pred = sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index[:, sp_ix:] ###
print(sp_ei_samp.shape, sp_ei_pred.shape)

sample_same_time = same_time[:, :sym_ix]
print(sample_same_time.shape, x_samp_len)



spy = sample_graph['SPY']
symb = sample_graph[sym]
print(sample_graph)


print([x.shape for k, x in symb.items()], [x.shape for k, x in spy.items()]) #edge_index_samp.shape, edge_index_pred.shape)
      #sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index)
#sample_graph['SPY', 'same_time', sym].edge_index = same_time_edges

# normalize stock data
ix, sample_graph[sym].x_samp.shape[0], sample_graph[sym].x_pred.shape, sample_graph['SPY'].x_samp.shape, sample_graph['SPY'].x_pred.shape

AttributeError: 'EdgeStorage' object has no attribute 'edge_index'

In [None]:
sym = 'TCRX'





graph = build_graph(['AMZN', 'MSFT', 'TCRX'])
print(graph.node_types)
sample_graph, ix = sample(graph, sym)

sample_graph.x_samp_dict
sample_graph.edge_index_dict



# make edge index with sample graph ixs










# sample edges
# sample_graph[sym, 'next_in_sequence', sym].edge_index = graph[sym][sym, 'next_in_sequence', sym].edge_index[:, s_ix:curr_ix + pred_len] ###
# sample_graph['SPY', 'next_in_sequence', 'SPY'].edge_index = graph['SPY']['SPY', 'next_in_sequence', 'SPY'].edge_index[:, f_sp_ix:l_sp_ix] ###
# sample_graph['SPY', 'same_time', sym].edge_index =  graph['SPY', 'same_time', sym].edge_index[:, s_ix:curr_ix + pred_len] ###

# print(sample_graph['SPY'].x_samp.shape, ix)
# Check
# sp_ix_raw, s_ix_raw = sample_graph['SPY', 'same_time', sym][:, ix]
# graph[sym][sym].node_ids[s_ix_raw] == graph['SPY']['SPY'].node_ids[sp_ix_raw]

#sample_graph[sym].x_samp, sample_graph[sym].x_pred = step(sample_graph[sym].x_samp, sample_graph[sym].x_pred)
#print(sample_graph[sym].x_samp, sample_graph[sym].x_pred)
# print(sample_graph['SPY', 'same_time', sym][:, ix])
# print(sample_graph['SPY', 'next_in_sequence', 'SPY']['edge_index'][0])

# sp_y = movement(sp_x_pred, pct=0.003)
# stock_y = movement(stock_x_samp, pct=0.003)

# while len(sample_graph['SPY'].x_pred):
#   ix = step(sample_graph, ix, sym)
#   s_ix = len(sample_graph[sym].x_samp)
#   sp_ix = len(sample_graph['SPY'].x_samp)
#   print(s_ix, sp_ix)
#   print(sample_graph['SPY'].node_ids[sp_ix] == sample_graph[sym].node_ids[s_ix])
#   print(sample_graph['SPY'].node_ids[sp_ix], sample_graph[sym].node_ids[s_ix])

# print(sample_graph['SPY'].x_samp, sample_graph['SPY'].x_pred)
# print(sample_graph)

In [None]:
m = stocks[:1]
n = 3#int(len(stocks)*0.8)
s = stocks[1:]
train = stocks[1:n]
val = stocks[n:]

def get_batch(data):
  ix = torch.randint(data.shape[1] - sample_size, (batch_size,)) # Random index to data

  x = torch.stack([data[:,i:i+sample_size] for i in ix])#.reshape(-1, sample_size)
  market = torch.stack([m[:,i:i+sample_size] for i in ix])#.reshape(-1, sample_size)
  y = hitTP(x)
  return Data(x=x, market=market, y=y)


def hitTP(data):
  #print(data.shape)
  context = data[:,:,:prev_context]
  y = data[:,:, prev_context:]

  tp = context[:,:,-1] * (1+data.std()*alpha)
 # print(y.shape, tp.shape)
  hits = (y >= tp[:,:, None])
  return (hits.sum(-1) > 0).int()


@torch.no_grad()
def estimate_loss(criterion):
  loss_dict = {}
  model.eval()

  for split_name, split_data in [('train', train), ('val', val)]:
    losses = torch.zeros(eval_iters)
    for k in range(eval_iters):
      data = get_batch(split_data)
      out = model(data)
      loss = criterion(torch.squeeze(out), data.y.float())
      losses[k] = loss.item()
    loss_dict[split_name] = losses.mean()
  model.train()
  return loss_dict

d = get_batch(stocks)
d

In [None]:
# hyperparameters
batch_size = 64 # how many independent sequences will we process in parallel?
block_size = 256 # what is the maximum context length for predictions?
num_epochs = 5000
eval_interval = 100
learning_rate = 3e-5
device = 'cuda' #if torch.cuda.is_available() else 'cpu'
eval_iters = 200
#n_embd = 32
n_head = 6
n_layer = 6
dropout = 0.2
n_embd = head_size = sample_size


class Head(nn.Module):
    """ one head of self-attention """

    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)

        #self.proj = nn.Linear(n_embd, head_size)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
        self.dropout = nn.Dropout(dropout)
        self.head_size = head_size

    def forward(self, data):
      # stock shapes for x stock: (L,1)
      # truncate DXY to (L,1)
      #x = data.x.reshape(-1, 1).float() # reshape to be (L,1) @ (1,L) ---> (L,L)
      #market = data.market.reshape(-1, 1).float()
      Q = self.query(data.x) # (L,L) @ ()
      K = self.key(data.market)
      V = self.value(data.market) # or market
      #P = self.proj(data.market)

      QK = Q @ K.transpose(-2, -1) / self.head_size**0.5
      W = F.softmax(QK, dim=1)
      att = W @ V
      out = att #@ P.transpose(-2,-1)# / head_size**0.5
      return out


class MultiHeadAttention(nn.Module):
  """ multiple heads of self-attention in parallel """

  def __init__(self, num_heads, head_size):
    super().__init__()
    self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
    self.proj = nn.Linear(head_size*num_heads, n_embd)
    self.dropout = nn.Dropout(dropout)
    self.head_size = head_size

  def forward(self, data):
    print(self.heads)
    out = torch.cat([h(data) for h in self.heads])#, dim=-1)
    out = self.dropout(self.proj(out.reshape(self.head_size, -1)))
    return out



class FeedForward(nn.Module):
  """ a simple linear layer followed by a non-linearity """

  def __init__(self, n_embd):
      super().__init__()
      self.net = nn.Sequential(
          nn.Linear(n_embd, 4*n_embd),
          nn.ReLU(),
          nn.Linear(4*n_embd,n_embd),
          nn.Dropout(dropout),
      )

  def forward(self, x):
    return self.net(x)


class Block(nn.Module):
  """ Transformer block: communication followed by computation """

  def __init__(self, n_embd, n_head):
      # n_embd: embedding dimension, n_head: the number of heads we'd like
      super().__init__()
      head_size = n_embd // n_head
      self.sa = MultiHeadAttention(n_head, head_size)
      self.ffwd = FeedForward(n_embd)
      self.ln1 = nn.LayerNorm(n_embd)
      self.ln2 = nn.LayerNorm(n_embd)

  def forward(self, x):
    x = self.sa(self.ln1(x))
    x = self.ffwd(self.ln2(x))
    return x

In [None]:
# x = data.x.reshape(-1, 1).float() # reshape to be (L,1) @ (1,L) ---> (L,L)
# market = data.market.reshape(-1, 1).float()
# Q = self.query(x) # (L,L) @ ()
# K = self.key(market)
# V = self.value(market)
# n_embd = sample_size # 342
# head_size = sample_size
# key = nn.Linear(n_embd, head_size, bias=False)
# query = nn.Linear(n_embd, head_size, bias=False)
# value = nn.Linear(n_embd, head_size, bias=False)
# proj = nn.Linear(n_embd, head_size)
# Q = query(d.x)#.reshape(64, -1, sample_size)) # (L,L) @ ()
# K = key(d.market)
# V = value(d.market)
# P = proj(d.market)

# QK = Q @ K.transpose(-2, -1) / head_size**0.5
# W = F.softmax(QK, dim=1)
# att = W @ V
# out = att @ P.transpose(-2,-1)# / head_size**0.5
# # return att

m = MultiHeadAttention(4, sample_size)
out = m(d)

# Q.shape, K.shape, V.shape, QK.shape, W.shape, att.shape, out
d.x, out.shape

In [None]:
train_list = [get_batch(train) for _ in range(1000)]
train_loader = DataLoader(train_list, collate_fn=lambda x: Batch.from_data_list(x), shuffle=True)

test_list = [get_batch(val) for _ in range(200)]
test_loader = DataLoader(test_list, collate_fn=lambda x: Batch.from_data_list(x), shuffle=True)


In [None]:
model = MultiHeadAttention(1,sample_size)#MultiHeadAttention(1, sample_size)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
criterion = nn.BCEWithLogitsLoss()

In [None]:
for epoch in range(num_epochs):
  epoch_loss = 0
  i = 0

  for data in train_loader:
    out = model(data)
    loss = criterion(torch.squeeze(out), data.y.float())

    # evaluate the loss
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

    epoch_loss += loss.item()

    # Display loss
    i += 1
    if epoch % 200 == 0 and i % len(train_loader)-1 == 0:
      losses = estimate_loss(criterion)
  print(f"Epoch [{epoch+1}/{num_epochs}]: TRAIN loss {losses['train']:.4f}, VAL loss {losses['val']:.4f}")


In [None]:
model = MultiHeadAttention(2, 64)# heads, head_size


In [None]:
d

In [None]:
# Normalize 'att'
att_normalized = att / att.mean()

# Convert 'att' tensor to numpy for PCA
att_numpy = att_normalized.detach().numpy()

# Apply PCA to reduce the dimensions to 3
pca = PCA(n_components=3)
att_3d = pca.fit_transform(att_numpy)

# Plot the 3D representation
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(att_3d[:, 0], att_3d[:, 1], att_3d[:, 2])
ax.set_title('3D Visualization of Attention Matrix')
ax.set_xlabel('PCA Component 1')
ax.set_ylabel('PCA Component 2')
ax.set_zlabel('PCA Component 3')
plt.show()

In [None]:
S_A = torch.ger(SPY, AMZN)

# Normalizing S_A
S_A /= S_A.mean()

# Convert S_A to numpy for PCA
S_A_numpy = S_A.numpy()

# Apply PCA to reduce the dimensions to 3
pca = PCA(n_components=3)
S_A_3d = pca.fit_transform(S_A_numpy)

# Plot the 3D representation
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(S_A_3d[:, 0], S_A_3d[:, 1], S_A_3d[:, 2])
ax.set_title('3D Visualization of S_A Matrix')
ax.set_xlabel('PCA Component 1')
ax.set_ylabel('PCA Component 2')
ax.set_zlabel('PCA Component 3')
plt.show()

In [None]:
sns.heatmap(S_A.numpy(), cmap='viridis')
plt.title('S_A Tensor Visualization')
plt.xlabel('AMZN Time Points')
plt.ylabel('SPY Time Points')
plt.show()

In [None]:
SPY = yf.download("SPY", start=startDate, end=endDate)
data

In [None]:

# data loading
def get_batch(split):
    # generate a small batch of data of inputs x and targets y
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,)) # creates
    x = torch.stack([data[i:i+block_size] for i in ix]) # concatenates
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters) # makes tensor of zeros
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

In [None]:
model = GPTLanguageModel()
m = model.to(device)
# print the number of parameters in the model
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')

# create a PyTorch optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

In [None]:
for iter in range(max_iters):

    # every once in a while evaluate the loss on train and val sets
    if iter % eval_interval == 0 or iter == max_iters - 1:
        losses = estimate_loss()
        print(f"step {iter}: train loss {losses['train']:.4f}, val loss {losses['val']:.4f}")

    # sample a batch of data
    xb, yb = get_batch('train')

    # evaluate the loss
    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

In [None]:
torch.save(model, 'model.pth')


In [None]:
torch.save(model.state_dict(), 'model_state_dict.pth')

In [None]:
# generate from the model
context = torch.zeros((1, 1), dtype=torch.long, device=device)
print(decode(m.generate(context, max_new_tokens=500)[0].tolist()))
#open('more.txt', 'w').write(decode(m.generate(context, max_new_tokens=10000)[0].tolist()))