In [1]:
%load_ext autoreload
%autoreload 2

import sys



In [2]:
import cgm 
import numpy  as np
import torch
import torch.nn as nn

In [3]:
cgm_master = cgm.CGM(
    hidden_dim=15,
    vol_input_size=2,
    price_input_size=5,
    seq_dropout_rate=0.5, 
    gbl_dropout_rate=0.4,
    last_dropout_rate=0.3,
    relation_num=3, 
    output_dim=3, 
    num_layers=9, 
    input_dim=12)

In [4]:
from datetime import datetime as dt
from datetime import timedelta as ddlta
import scipy.stats
base_dt =  dt.now().date()
ls_datetimes = [ dt.strftime(base_dt + ddlta(minutes=i), format="%Y%m%d%H%M") for i in range(3600)] 

In [5]:
# Price Data
company_names = ["kaisya","Kigyo",'Otutome','Company'] # 4 Comps 
time_seqs = range(202204220000,202204220100)
# psudo_price ={ company_name: { _time: np.random.rand(5)*100 for _time in time_seqs } for company_name in company_names } # price: ohlcv
psudo_price =np.random.rand(len(company_names), len(time_seqs), 5)*100  # simply: Comp * time * features 

In [6]:
# Voluem Data
company_names = ["kaisya","Kigyo",'Otutome','Company']
time_seqs = range(202204220000,202204220100)
# psudo_price ={ company_name: { _time: np.random.rand(5)*100 for _time in time_seqs } for company_name in company_names } # price: ohlcv
psudo_volume =np.random.rand(len(company_names), len(time_seqs), 2)*100  # simply: Comp * time * features 

In [7]:
def build_graph(data,n_feature):
    graph = {}
    np_graph = []
    for _feature_key in range(n_feature):
        graph[_feature_key] = {}
        layer_1 = []
        for _comp_key_1 in range(len(company_names)):
            graph[_feature_key][_comp_key_1] = {}
            layer_2 = []
            for _comp_key_2 in range(len(company_names)):
                ir, p = scipy.stats.pearsonr(data[_comp_key_1,:,_feature_key],data[_comp_key_2,:,_feature_key])
                graph[_feature_key][_comp_key_1][_comp_key_2] = ir
                layer_2.append(ir)
            layer_1.append(layer_2)
        np_graph.append(layer_1)
    return graph,np_graph

In [10]:
price_data = psudo_price.transpose(1, 0, 2)
price_data = torch.from_numpy(price_data).float()

volume_data = psudo_volume.transpose(1, 0, 2)
volume_data = torch.from_numpy(volume_data).float()

In [11]:
x_price = cgm_master.feature_weight_price(price_data[:, :, :5])
x_volume = cgm_master.feature_weight_volume(volume_data[:, :, :2])

In [25]:
price_graph, price_graph_np = build_graph(x_price.detach().numpy(),5)
volume_graph, volume_graph_np = build_graph(x_volume.detach().numpy(),5)

In [26]:
volume_graph_np = torch.from_numpy(np.array(volume_graph_np)).float()
price_graph_np = torch.from_numpy(np.array(price_graph_np)).float()

In [27]:

last_h_time_price = torch.squeeze(x_price[0], 0)  # node feature 
last_c_time_price = torch.squeeze(x_price[0], 0)  # node feature 

last_h_time_volume = torch.squeeze(x_volume[0], 0)  # node feature 
last_c_time_volume = torch.squeeze(x_volume[0], 0)  # node feature 


In [29]:
last_g_time_price = last_h_time_price
last_c_g_time_price = last_c_time_price

In [30]:
last_g_time_volume = last_h_time_volume
last_c_g_time_volume= last_c_time_volume

In [31]:

time = len(time_seqs)
for t in range(time):
    # init
    last_h_layer_price = last_h_time_price
    last_c_layer_price = last_c_time_price
    # information integration 
    # Each input: Comps * hidden size
    last_g_layer_price, last_c_g_layer_price = cgm_master.g_cell.init_forward(last_g_time_price, last_c_g_time_price,last_h_layer_price )
    for l in range(cgm_master.num_layers):
        # x, h, c, g, h_t, adj
        last_h_layer_price, last_c_layer_price = cgm_master.s_cell(
                                                                torch.squeeze(x_price[t], 0), 
                                                                last_h_layer_price,
                                                                last_c_layer_price, 
                                                                last_g_layer_price,
                                                                last_h_time_price,
                                                                price_graph_np)
        # g, c_g, t_g, t_c, h, c
        last_g_layer_price, last_c_g_layer_price = cgm_master.g_cell(
                                                                last_g_layer_price,
                                                                last_c_g_layer_price,
                                                                last_g_time_price,
                                                                last_c_g_time_price,
                                                                last_h_layer_price, 
                                                                last_c_layer_price)



    last_h_time_price, last_c_time_price = last_h_layer_price, last_c_layer_price
    last_g_time_price, last_c_g_time_price = last_g_layer_price, last_c_g_layer_price

In [32]:
### volume graph ###

time = len(time_seqs)
for t in range(time):
    # init
    last_h_layer_volume = last_h_time_volume
    last_c_layer_volume = last_c_time_volume
    # information integration 
    last_g_layer_volume, last_c_g_layer_volume = cgm_master.g_cell.init_forward(
                                                                        last_g_time_volume,
                                                                        last_c_g_time_volume,
                                                                        last_h_layer_volume,
                                                                        )
    for l in range(cgm_master.num_layers):
        # x, h, c, g, h_t, adj
        last_h_layer_volume, last_c_layer_volume = cgm_master.s_cell(torch.squeeze(x_volume[t], 0),
                                                               last_h_layer_volume, last_c_layer_volume,
                                                               last_g_layer_volume, last_h_time_volume, volume_graph_np)
        # g, c_g, t_g, t_c, h, c
        last_g_layer_volume, last_c_g_layer_volume = cgm_master.g_cell(last_g_layer_volume, last_c_g_layer_volume,
                                                                 last_g_time_volume, last_c_g_time_volume,
                                                                 last_h_layer_volume, last_c_layer_volume,
                                                                 )
        
    last_h_time_volume, last_c_time_volume = last_h_layer_volume, last_c_layer_volume
    last_g_time_volume, last_c_g_time_volume = last_g_layer_volume, last_c_g_layer_volume


In [34]:
### CCA ###
cca_price, cca_volume = cgm_master.cca_price(last_h_time_price), cgm_master.cca_volume(last_h_time_volume)
last_h_layer, last_c_layer, last_g_layer, last_c_g_layer = last_h_layer_volume, last_c_layer_volume, last_g_layer_volume, last_c_g_layer_volume


In [35]:
last_h, cca_price, cca_volume  = last_h_layer, cca_price, cca_volume

In [36]:
out = cgm_master.w_out(cgm_master.dropout(last_h))

In [41]:
seed  = 255
use_cuda = False
restore = False
torch.manual_seed(seed)
if use_cuda:
    torch.cuda.manual_seed(255)

# checkpoint
if restore:  # 存储已有模型的路径
    print('loading checkpoint...\n')
    checkpoints = torch.load(os.path.join(log_path, restore))

torch.backends.cudnn.benchmark = True

# model
print('building model...\n')
model = CGM(config, vocab)

building model...



NameError: name 'CGM' is not defined

In [37]:

if args.restore:
    model.load_state_dict(checkpoints['model'])
if use_cuda:
    model.cuda()
if len(args.gpus) > 1:  # 并行
    model = nn.DataParallel(model, device_ids=args.gpus, dim=1)
logging(repr(model) + "\n\n")  # 记录这个文件的框架

# total number of parameters
param_count = 0
for param in model.parameters():
    param_count += param.view(-1).size()[0]

logging('total number of parameters: %d\n\n' % param_count)
z
# updates是已经进行了几个epoch, 防止中间出现程序中断的情况.
if args.restore:
    updates = checkpoints['updates']
    ori_updates = updates
else:
    updates = 0

# optimizer
if args.restore:
    optim = checkpoints['optim']
else:
    optim = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay,
                  start_decay_at=config.start_decay_at)

optim.set_parameters(model.parameters())
if config.schedule:
    scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)
else:
    scheduler = None

if not args.notrain:
    max_acc, test_acc = train(model, dataloader, scheduler, optim, updates)
    logging("Best accuracy: %.2f, test accuracy: %.2f\n" % (max_acc * 100, test_acc * 100))
    return test_acc
else:
    assert args.restore is not None
    eval(model, vocab, dataloader, 0, updates, do_test=True)

tensor([[[-0.5844,  0.7102, -0.0701],
         [-0.1273,  0.1223, -0.4591],
         [-0.4914,  0.4389, -0.1866],
         [-0.1138,  0.6608, -0.1248]]], grad_fn=<AddBackward0>)

In [9]:
# TODO: node_feature order and contents
node_emb = self.node_emb(span_nodes)  # idx of node

x_volume = self.feature_weight_volume(node_feature[:, :, 6:])

### price graph ###
last_h_time_price = torch.squeeze(x_price[0], 0)  # node feature 
last_c_time_price = torch.squeeze(x_price[0], 0)  # node feature 

last_g_time_price = self.attn_pooling(last_h_time_price, node_emb)
last_c_g_time_price = self.attn_pooling(last_c_time_price, node_emb)
# h_states = []

time = node_feature.size(0)
for t in range(time):
    # init
    last_h_layer_price = last_h_time_price
    last_c_layer_price = last_c_time_price
    # information integration 
    last_g_layer_price, last_c_g_layer_price = self.g_cell.init_forward(last_g_time_price, last_c_g_time_price,
                                                                        last_h_layer_price, last_c_layer_price,
                                                                        node_emb)
    for l in range(self.num_layers):
        # x, h, c, g, h_t, adj
        last_h_layer_price, last_c_layer_price = self.s_cell(torch.squeeze(x_price[t], 0), last_h_layer_price,
                                                             last_c_layer_price, last_g_layer_price,
                                                             last_h_time_price, adj)
        # g, c_g, t_g, t_c, h, c
        last_g_layer_price, last_c_g_layer_price = self.g_cell(last_g_layer_price, last_c_g_layer_price,
                                                               last_g_time_price, last_c_g_time_price,
                                                               last_h_layer_price, last_c_layer_price, node_emb)


    last_h_time_price, last_c_time_price = last_h_layer_price, last_c_layer_price
    last_g_time_price, last_c_g_time_price = last_g_layer_price, last_c_g_layer_price

### volume graph ###
last_h_time_volume = torch.squeeze(x_volume[0], 0)  # node feature 
last_c_time_volume = torch.squeeze(x_volume[0], 0)  # node feature 
last_g_time_volume = self.attn_pooling(last_h_time_volume, node_emb)
last_c_g_time_volume = self.attn_pooling(last_c_time_volume, node_emb)
# h_states = []

CGM(
  (feature_weight_price): Linear(in_features=7, out_features=5, bias=True)
  (feature_weight_volume): Linear(in_features=6, out_features=5, bias=True)
  (feature_combine): Linear(in_features=20, out_features=5, bias=True)
  (cca_price): Sequential(
    (0): Linear(in_features=5, out_features=10, bias=True)
    (1): ReLU()
    (2): Linear(in_features=10, out_features=10, bias=True)
    (3): ReLU()
    (4): Linear(in_features=10, out_features=5, bias=True)
  )
  (cca_volume): Sequential(
    (0): Linear(in_features=5, out_features=10, bias=True)
    (1): ReLU()
    (2): Linear(in_features=10, out_features=10, bias=True)
    (3): ReLU()
    (4): Linear(in_features=10, out_features=5, bias=True)
  )
  (attn_pooling): Attentive_Pooling(
    (w_1): Linear(in_features=5, out_features=5, bias=True)
    (w_2): Linear(in_features=5, out_features=5, bias=True)
    (u): Linear(in_features=5, out_features=1, bias=False)
  )
  (s_cell): SLSTMCell(
    (dropout): Dropout(p=0.5, inplace=False)
  