In [None]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime

%matplotlib inline
from finrl.config_tickers import DOW_30_TICKER
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent,DRLEnsembleAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline

from pprint import pprint

import sys
sys.path.append("../FinRL-Library")

import itertools

## 2. Create Folders

In [4]:
import os
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)

check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])

In [5]:
print(DOW_30_TICKER)

['AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS', 'DOW']


In [6]:
TRAIN_START_DATE = '2009-04-01'
TRAIN_END_DATE = '2023-01-01'
TEST_START_DATE = '2023-01-01'
TEST_END_DATE = '2024-02-25'

df = YahooDownloader(start_date = TRAIN_START_DATE,
                     end_date = TEST_END_DATE,
                     ticker_list = DOW_30_TICKER).fetch_data()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Shape of DataFrame:  (109992, 8)


In [12]:
df.sort_values(['date','tic'])

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2009-04-01,3.717500,3.892857,3.710357,3.286319,589372000,AAPL,2
1,2009-04-01,48.779999,48.930000,47.099998,34.743191,10850100,AMGN,2
2,2009-04-01,13.340000,14.640000,13.080000,11.563840,27701800,AXP,2
3,2009-04-01,34.520000,35.599998,34.209999,26.850750,9288800,BA,2
4,2009-04-01,27.500000,29.520000,27.440001,19.333210,15308300,CAT,2
...,...,...,...,...,...,...,...,...
109987,2024-02-23,526.750000,530.500000,524.159973,527.239990,2502000,UNH,4
109988,2024-02-23,284.720001,285.950012,283.500000,283.600006,5107100,V,4
109989,2024-02-23,40.630001,41.150002,40.459999,40.660000,15227200,VZ,4
109990,2024-02-23,21.500000,21.930000,21.410000,21.730000,31884500,WBA,4


In [13]:
df.tic.value_counts()

tic
AAPL    3750
AMGN    3750
WMT     3750
WBA     3750
VZ      3750
V       3750
UNH     3750
TRV     3750
PG      3750
NKE     3750
MSFT    3750
MRK     3750
MMM     3750
MCD     3750
KO      3750
JPM     3750
JNJ     3750
INTC    3750
IBM     3750
HON     3750
HD      3750
GS      3750
DIS     3750
CVX     3750
CSCO    3750
CRM     3750
CAT     3750
BA      3750
AXP     3750
DOW     1242
Name: count, dtype: int64

In [15]:
INDICATORS = ['macd',
               'rsi_30',
               'cci_30',
               'dx_30']

In [16]:
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list = INDICATORS,
                     use_turbulence=True,
                     user_defined_feature = False)

processed = fe.preprocess_data(df)
processed = processed.copy()
processed = processed.fillna(0)
processed = processed.replace(np.inf,0)

Successfully added technical indicators
Successfully added turbulence index


In [18]:
processed.sample(10)

Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,rsi_30,cci_30,dx_30,turbulence
76252,2019-09-11,169.789993,171.360001,169.199997,156.965057,4047500,HON,2,0.777886,54.210333,116.508135,6.702665,35.468962
94325,2022-03-02,240.850006,242.740005,239.610001,231.726746,4482000,MCD,2,-3.675247,39.90782,-169.141616,39.82086,31.005272
18015,2011-09-16,16.719999,16.84,16.530001,11.446027,62292100,CSCO,4,0.189458,55.305103,144.762633,25.220856,43.312985
48454,2015-11-17,113.730003,115.440002,113.169998,100.981873,1981400,UNH,1,-1.207918,46.838785,-77.213046,14.249044,54.622444
102508,2023-04-17,151.169998,151.470001,150.339996,147.304413,4296400,PG,0,2.438439,60.822398,87.176741,39.576832,12.721174
91315,2021-10-01,152.410004,153.850006,150.289993,145.045105,1145600,TRV,4,-0.771036,46.901062,-161.013244,24.753399,120.087451
9744,2010-08-02,9.301429,9.378214,9.272143,7.917218,428055600,AAPL,0,0.026351,53.652382,34.913693,5.736421,25.432184
56345,2016-12-15,85.629997,86.300003,85.019997,64.716621,3461600,WBA,3,0.833594,56.53643,78.889387,16.723896,13.007379
983,2009-05-19,27.939713,28.014744,27.695862,13.309639,12002273,VZ,1,-0.155669,41.727167,-121.333305,26.441304,0.0
95374,2022-04-22,163.240005,163.520004,160.910004,154.168777,9394600,PG,4,2.206938,57.167685,121.637385,29.550438,96.840276
