In [1]:
import os
from pathlib import Path
path = Path(os.getcwd())
os.chdir(path.parent.absolute())

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import pandas_ta as ta
from stable_baselines3.dqn import DQN
import gym
import matplotlib.pyplot as plt
from stable_baselines3.common.evaluation import evaluate_policy
import mt4_hst
import stock_env
from stock_env.utils import *
from stock_env.envs.vn_stock_env import VietnamStockEnv
from stable_baselines3.common.env_checker import check_env
%matplotlib inline

pd.options.display.max_rows = 4000

In [23]:
help(ta.donchian)

Help on function donchian in module pandas_ta.volatility.donchian:

donchian(high, low, lower_length=None, upper_length=None, offset=None, **kwargs)
    Donchian Channels (DC)
    
    Donchian Channels are used to measure volatility, similar to
    Bollinger Bands and Keltner Channels.
    
    Sources:
        https://www.tradingview.com/wiki/Donchian_Channels_(DC)
    
    Calculation:
        Default Inputs:
            lower_length=upper_length=20
        LOWER = low.rolling(lower_length).min()
        UPPER = high.rolling(upper_length).max()
        MID = 0.5 * (LOWER + UPPER)
    
    Args:
        high (pd.Series): Series of 'high's
        low (pd.Series): Series of 'low's
        lower_length (int): The short period. Default: 20
        upper_length (int): The short period. Default: 20
        offset (int): How many periods to offset the result. Default: 0
    
    Kwargs:
        fillna (value, optional): pd.DataFrame.fillna(value)
        fill_method (value, optional): Type

In [32]:
df = mt4_hst.read_hst("stock_env/datasets/FPT1440.hst")
df = df[df['time'] >= '2012-01-01']
df.sort_values(by='time', inplace=True)
df = df.reset_index(drop=True)

df.ta.rsi(length=20, append=True)
df.ta.natr(length=20, scalar=1, append=True)
df.ta.log_return(length=5, append=True)
df.ta.log_return(length=20, append=True)
df.ta.percent_return(length=5, append=True)
df.ta.percent_return(length=20, append=True)

# trend setup
# df.ta.sma(50, append=True)
df['close>sma50'] = (df['close'] > df.ta.sma(50)).astype(int)
df['close>sma100'] = (df['close'] > df.ta.sma(100)).astype(int)
df['close>sma200'] = (df['close'] > df.ta.sma(200)).astype(int)

# df.ta.ema(10, append=True)
# df.ta.ema(20, append=True)
df['ema10>ema20'] = (df.ta.ema(10) > df.ta.ema(20)).astype(int)

donchian_20 = ta.donchian(df['high'], df['close'], lower_length=20, upper_length=20)
donchian_50 = ta.donchian(df['high'], df['close'], lower_length=50, upper_length=50)
df['higher_low'] = (donchian_20['DCL_20_20'] > donchian_50['DCL_50_50']).astype(int)
df['breakout'] = (df['close'] > donchian_20['DCU_20_20'].shift(1)).astype(int)

# volume confirm
df['volume_breakout'] = (df['volume'] > ta.sma(df['volume'], 20)).astype(int)

# df.ta.tsignals(ta.ema(df['close'], 10) > ta.ema(df['close'], 20), append=True)
# df.ta.log_return(append=True)
# df.ta.percent_return(append=True)
# df.ta.roc(append=True)
# df.ta.stoch(append=True)
# df.ta.stochrsi(append=True)
# df.ta.tsi(append=True)

# df.describe()
df.dropna(inplace=True)
df.head()
df.to_csv('temp/features.csv', index=False)

In [33]:
remove_col = set('time open high close low volume'.split())
cols = list(set(df.columns).difference(remove_col))
features = df[cols]
features.dropna(inplace=True)
features = features.reset_index(drop=True)
features.head()

Unnamed: 0,RSI_20,close>sma50,LOGRET_20,LOGRET_5,close>sma100,ema10>ema20,PCTRET_20,volume_breakout,close>sma200,breakout,PCTRET_5,NATR_20,higher_low
0,58.416994,0,0.02608,0.015968,0,1,0.026423,0,0,0,0.016097,0.01735,0
1,65.359502,0,0.045688,0.019609,0,1,0.046748,1,0,0,0.019802,0.017242,0
2,68.157521,0,0.075885,0.019418,0,1,0.078838,1,0,1,0.019608,0.017282,0
3,62.816586,0,0.057958,0.019609,0,1,0.059671,1,0,0,0.019802,0.016712,0
4,58.029902,0,0.044095,0.009852,0,1,0.045082,0,0,0,0.009901,0.017136,0


In [34]:
df.head()

Unnamed: 0,time,open,high,low,close,volume,RSI_20,NATR_20,LOGRET_5,LOGRET_20,PCTRET_5,PCTRET_20,close>sma50,close>sma100,close>sma200,ema10>ema20,higher_low,breakout,volume_breakout
20,2012-02-07,6.875043,6.943113,6.806973,6.875043,74910.0,58.416994,0.01735,0.015968,0.02608,0.016097,0.026423,0,0,0,1,0,0,0
21,2012-02-08,6.943113,7.011183,6.875043,7.011183,168150.0,65.359502,0.017242,0.019609,0.045688,0.019802,0.046748,0,0,0,1,0,0,1
22,2012-02-09,7.011183,7.079252,6.943113,7.079252,153780.0,68.157521,0.017282,0.019418,0.075885,0.019608,0.078838,0,0,0,1,0,1,1
23,2012-02-10,7.079252,7.079252,7.011183,7.011183,195060.0,62.816586,0.016712,0.019609,0.057958,0.019802,0.059671,0,0,0,1,0,0,1
24,2012-02-13,7.079252,7.079252,6.943113,6.943113,113870.0,58.029902,0.017136,0.009852,0.044095,0.009901,0.045082,0,0,0,1,0,0,0
