In [46]:
# Library
import sys, os
sys.path.append(os.path.abspath('..'))

from hdf5_loader import StockDatasetHDF5
from myconfig import *
import subclass as sc

import pandas as pd
import numpy as np
import seaborn as sns
from tqdm import tqdm
import time
import matplotlib.pyplot as plt
from collections import defaultdict, OrderedDict
from datetime import datetime, timedelta
import os, shutil, wandb

import torch
import torch.nn as nn
from torch.utils.data import IterableDataset, DataLoader
import torch.nn.functional as F
import torchsummary

# np.set_printoptions(precision=4, suppress=True, linewidth=120)
torch.set_printoptions(sci_mode=False, precision=4)

In [2]:
def initialize_log_dir(base_log_dir="./tensorboard_logs"):
    if os.path.exists(base_log_dir):
        shutil.rmtree(base_log_dir)  # 기존 로그 디렉토리 삭제
    os.makedirs(base_log_dir, exist_ok=True)  # 새 디렉토리 생성
    return os.path.join(base_log_dir, datetime.now().strftime("%Y%m%d-%H%M%S"))

### Model

In [87]:
import model as m

ticker_list=[
    'AAPL',
    'MSFT',
    'GOOGL',
    'META',
    'IBM',
    'INTC',
]
date_range=[ST, ED]
hz_dim = {hz:128 for hz in THZ}
targ_hz = '5m'
label_weights = {hz:v for hz, v in zip(THZ, [0.1, 0.3, 0.5, 0.1, 0])}
batch_size = 1

In [88]:
import importlib
_ = importlib.reload(m)
_ = importlib.reload(sc)

In [89]:
hdf5_inst = sc.StockDatasetHDF5(ticker_list, date_range)
model = m.CustomCNN(hz_dim, THZ)
criterion = sc.get_label
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [90]:
max_iter = 200000
envgen = sc.get_samples(hdf5_inst, hz_dim, targ_hz, tensor=True)

wandb.init(
    project='FinanceProject',
    name='test1',
)

output_saving = 0; v=0
for i in range(max_iter):
    samples = defaultdict(list); labels = defaultdict(list)
    for j in range(batch_size):
        sample, label = next(envgen)
        if sample:
            current_price = sample.pop('current_price')
            for hz in THZ:
                samples[hz].append(sample[hz])
                labels[hz].append(label[hz])
        else: continue
        
    for hz in THZ: 
        samples[hz] = torch.stack(samples[hz])
        labels[hz] = torch.stack(labels[hz])
        
    label = criterion(labels, label_weights)
        
    model.train()
    value = model(samples)
    loss = nn.MSELoss()
    output = loss(torch.flatten(value), label)
    output_saving += output.item()

    optimizer.zero_grad()
    output.backward()
    optimizer.step()
    
    if i % 100 == 99:
        # wandb.log({"batch": i//128, "train_loss": output.detach().item(), 'label':torch.mean(label).detach().item(),
        #            'value': torch.mean(value).detach().item()})
        wandb.log({"train_loss": output_saving})
        output_saving = 0
        
    if i % 10 == 9:
        wandb.log({f'value{v}': torch.mean(value).detach().item(),
                   f'label{v}': torch.abs(torch.mean(label)).detach().item()})
        if i % 10000 == 9999: v += 1
    
wandb.finish()

KeyboardInterrupt: 

In [91]:
wandb.finish()

0,1
label0,▄▂▄▁▂▇▃▁▁█▃▅▂▃▆▆▃▃▃▁▁█▁▂▅▂▂▁▆▃▂▂▂▂▄▂▂▂▂▂
label1,█▁▃▃▁▂▄▃▁▁▃▁▄▁▃▂▁▂▁▁▃▁▁▂▂▂▄▂▁▁▁▄▂▂▁▂▂▂▂▁
label2,▃█▅▆▆▂▂▃▃▂▆▄▁▁▂▆▄▁▁▃▁▃▁▁▂▂▇▂▄▃▂▂▂▁▂▁▂▂▂▁
train_loss,▁▂▁█▁▁▁▁▂▁▁▁▁▂▁▁▁▁▂▇▃▂▁▂▂▂▁▁▃▂▁▂▁▂▁▂▁▂▁▁
value0,▇▇▅▄▄▂▄▃▅▅█▅▆▅▅▅▆▁▄▁▅▅▅▂▅▄▄▅▃▄▅▄▄▄▄▄▄▃▃▄
value1,█▇▇▇▆▆▄▄▄▄▁▄▅▆▆▅▁▅▅▆▇▆▅▆▄█▆▆▇▆▄▆▅▅▅▄▃▃▅▄
value2,▅▅▆▅▆▆▅▅▅▅▆█▆▆▆▇▇▆▆▆▆█▆▅▆▅▃▂▁▁▂▄▅▅▆█▆▅▄▅

0,1
label0,0.1138
label1,0.06296
label2,0.17252
train_loss,3.00642
value0,-0.14845
value1,-0.10354
value2,-0.03474


In [80]:
samples['1m'].shape

torch.Size([8, 5])