In [6]:
import sys
import os

script_path = os.path.abspath('')
current_file_dir = os.path.dirname(script_path)
sys.path.insert(0, f"{current_file_dir}/FinRL/")

import logging
import pandas as pd
import numpy as np
import datetime
import torch
import yfinance as yf

from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl import config_tickers
from finrl.config import INDICATORS

from finrl.meta.preprocessor.preprocessors import GroupByScaler
from finrl.meta.env_portfolio_optimization.env_portfolio_optimization import PortfolioOptimizationEnv
from finrl.agents.portfolio_optimization.models import DRLAgent
from finrl.agents.portfolio_optimization.architectures import EIIE

import src.env as env 

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [7]:
len(env.STOCKS)

28

In [8]:
print(env.VALIDATION_START)

2018-01-01 00:00:00


In [9]:
print(env.TEST_START)

2019-01-01 00:00:00


In [10]:
def load_data() -> pd.DataFrame:
    # download data
    stocks = env.STOCKS
    script_path = os.path.abspath('')
    current_file_dir = os.path.dirname(script_path)
    data_path = os.path.join(
           current_file_dir, 
           "src",
           "data", 
           f"{env.TIME_RANGE_START}-{env.TIME_RANGE_START}-{'-'.join(env.STOCKS)}.pkl"
    )
    if not os.path.isfile(data_path): 
       df = YahooDownloader(
               start_date = env.TIME_RANGE_START,
               end_date = env.TIME_RANGE_END,
               ticker_list = env.STOCKS,
            ).fetch_data()
       df.to_pickle(data_path)
    else:
       df = pd.read_pickle(data_path)
    return df


In [11]:
df = load_data()

# preprocess data, calculate set of technical indicators on the data
fe = FeatureEngineer(
       use_technical_indicator=True,
       tech_indicator_list = env.INDICATORS
    )

df_t = fe.preprocess_data(df)
fmt = "%Y-%m-%d"
train_df = data_split(df, env.TRAIN_START.strftime(fmt), env.TRAIN_END.strftime(fmt))
test_df = data_split(df, env.TEST_START.strftime(fmt), env.TEST_END.strftime(fmt))

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Shape of DataFrame:  (70448, 8)
Successfully added technical indicators


### Load in graph and temporal data to enhance feature space

In [6]:
if not os.path.isfile("Temporal_Relational_Stock_Ranking_FinRL/temporal_data/NASDAQ_temporal_data.csv"):
    !curl -L -o Temporal_Relational_Stock_Ranking_FinRL-main.zip https://github.com/C4i0kun/Temporal_Relational_Stock_Ranking_FinRL/archive/refs/heads/main.zip
    !unzip Temporal_Relational_Stock_Ranking_FinRL-main.zip
    !mv Temporal_Relational_Stock_Ranking_FinRL-main Temporal_Relational_Stock_Ranking_FinRL
    !tar -xzvf Temporal_Relational_Stock_Ranking_FinRL/temporal_data/temporal_data_processed.tar.gz -C Temporal_Relational_Stock_Ranking_FinRL/temporal_data

In [9]:
temporal_nyse = pd.read_csv("Temporal_Relational_Stock_Ranking_FinRL/temporal_data/NYSE_temporal_data.csv")
temporal_nasdaq = pd.read_csv("Temporal_Relational_Stock_Ranking_FinRL/temporal_data/NASDAQ_temporal_data.csv")

In [10]:
set(env.STOCKS) - set(list(temporal_nyse["tic"].unique()) + list(temporal_nasdaq["tic"].unique()))

set()

In [11]:
from torch_geometric.utils import k_hop_subgraph

In [12]:
temporal_nyse

Unnamed: 0,day,open,high,low,close,volume,tic
0,0,0.578496,0.580309,0.567871,0.555112,0.593285,A
1,0,0.586496,0.589924,0.593711,0.598892,0.603702,AAN
2,0,0.357611,0.357296,0.359589,0.367068,0.361763,AAP
3,0,0.602742,0.597432,0.592057,0.594226,0.620892,AAT
4,0,0.535432,0.529228,0.526605,0.533735,0.555247,AB
...,...,...,...,...,...,...,...
2162560,1244,0.961218,0.962850,0.965538,0.967666,0.959063,ZB-A
2162561,1244,0.862950,0.864858,0.853509,0.855318,0.851905,ZBH
2162562,1244,0.797489,0.798619,0.790712,0.792157,0.795982,ZF
2162563,1244,0.712403,0.711281,0.688027,0.658353,0.734713,ZNH
