In [8]:
import tensortrade.env.default as default
from tensortrade.oms.exchanges import Exchange
from tensortrade.feed import Stream
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.feed.core import Stream, DataFeed, NameSpace
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.oms.instruments import Instrument
from tensortrade.agents import DQNAgent, ParallelDQNAgent
from tensortrade.env.default.actions import BSH, ManagedRiskOrders
from tensortrade.env.default.rewards import RiskAdjustedReturns, PBR, SimpleProfit
from tensortrade.env.default.renderers import PlotlyTradingChart
from sklearn.preprocessing import MinMaxScaler
import multiprocessing
import pandas as pd
import numpy as np

pd.options.mode.use_inf_as_na = True

In [None]:
scaling des features pouvant etre important => robust scaler
fracdiff
deflated sharpe ratio
modele a l'air tres puisaste
https://github.com/zoakes/RL/blob/master/RI_ML.ipynb

In [9]:
def pca_df(df, n_components):
    pca = PCA(n_components = n_components)
    pca.fit(df)
    eig_val, eig_ratio = pca.explained_variance_, pca.explained_variance_ratio_ 
    df_pca = pd.DataFrame(pca.transform(df), index =df.index)
    df_pca.columns = [f"PC{i+1}" for i in range(n_components)]
    print(f"Keyser : {len(eig_val[eig_val > 1])}, Actual E.V. ratio : {np.round(eig_ratio.cumsum()[-1:][0]*100,2)}")
    return df_pca 

In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

def load_ticker_data(tickers, drop_first_nrows=500):
    env_data = []
    first_dates = []
    last_dates = []
    for ticker in tickers:
        data = pd.read_pickle(f'data/usdt_data_features/{ticker}.pickle').astype(float)
        if drop_first_nrows != 0:
            data = data.iloc[drop_first_nrows:, :]
        env_data.append(data)
        first_dates.append(data.index[0]) 
        last_dates.append(data.index[-1])

    max_first_date = max(first_dates)
    min_last_date = min(last_dates)
    env_data = [data.loc[(data.index >= max_first_date) & (data.index <= min_last_date)].bfill().ffill() for data in env_data]

    print(f'max first date : {max_first_date} max last date : {min_last_date}')
    return env_data


def clean_data(stocks_data):
    cleaned_data = []
    for stock_data in stocks_data:
        filtered_data = stock_data.copy().drop(columns=['open', 'high', 'low', 'close', 'volume','returns'])
        corr_matrix = filtered_data.corr()
        upperMatrix = corr_matrix.where(np.triu(np.ones(corr_matrix.shape, dtype=bool), k=1))
        corrFeatures = [column for column in upperMatrix.columns if any(upperMatrix[column] > 0.95)]
        print(f'dropped: {corrFeatures}')
        cleaned_data.append(pd.concat([filtered_data.drop(columns=corrFeatures),stock_data[['open', 'high', 'low', 'close', 'volume','returns']]],axis=1))
    return cleaned_data

def split_data(data_list, train_sz=0.8, test_sz=0.2):
    X_train_list, X_test_list, X_valid_list = [], [], []
    y_train_list, y_test_list, y_valid_list = [], [], []

    for data in data_list:
        X = data.copy().drop(columns=['returns'])
        y = data.copy()['returns']

        X_train_test, X_valid, y_train_test, y_valid = train_test_split(X, y, train_size=train_sz, test_size=test_sz, shuffle=False)
        X_train, X_test, y_train, y_test = train_test_split(X_train_test, y_train_test, train_size=train_sz, test_size=test_sz, shuffle=False)

        X_train_list.append(X_train)
        X_test_list.append(X_test)
        X_valid_list.append(X_valid)
        y_train_list.append(y_train)
        y_test_list.append(y_test)
        y_valid_list.append(y_valid)

    return X_train_list, X_test_list, X_valid_list, y_train_list, y_test_list, y_valid_list

def separate_render_features(stocks_data, tickers):
    ohlc_list, features_scaled_list = [], []
    i=0
    for stock_data in stocks_data:
        ticker = tickers[i]
        #ohlc prices for render
        stock_ohlc = stock_data[['open','high','low','close','volume']].copy()
        stock_ohlc['date'] = stock_ohlc.index
        stock_ohlc = stock_ohlc.add_prefix(f"{ticker}:")
        ohlc_list.append(stock_ohlc)

        #all features to train from + scaling
        scaler = StandardScaler()
        stock_features = stock_data.copy()
        stock_features = stock_features.add_prefix(f"{ticker}:")
        scaler.fit(stock_features)
        stock_features_scaled = pd.DataFrame(scaler.fit_transform(stock_features), columns = stock_features.columns, index = stock_features.index)
        features_scaled_list.append(stock_features_scaled)
        i+=1

    return ohlc_list, features_scaled_list

def get_price_stream(stock_renders, tickers):
    stock_price_stream_list = []
    
    for i in range(len(stock_renders)):
        stock_price_stream_list.append(Stream.source(list(stock_renders[i][f"{tickers[i]}:close"]), dtype="float").rename(f"USDT-{tickers[i]}"))
    return stock_price_stream_list

def create_data_feed(features, use_pca):
    all_scaled_features = pd.concat(features, axis=1)
    if use_pca : 
        all_scaled_features = pca_df(all_scaled_features,100)
    with NameSpace("binance"):
        features = [Stream.source(list(all_scaled_features[feature]), dtype="float").rename(feature) for feature in all_scaled_features.columns]
    all_features_feed = DataFeed(features)
    all_features_feed.compile()
    return all_features_feed

def create_portfolio(tickers, currency = "USDT"):
    instruments = {}
    wallets = {}
    for ticker in tickers:
        instruments[ticker] = Instrument(ticker, 8, ticker)
        wallets[ticker] = Wallet(binance, 0 * instruments[ticker])
    usdt = Instrument(currency, 8, currency)
    wallets['cash'] = Wallet(binance, 10000 * usdt) 
    portfolio_assets = [wallets['cash']] + [wallets[ticker] for ticker in tickers if ticker != 'USDT']
    return Portfolio(usdt, portfolio_assets)

tickers = ["BTCUSDT","ETHUSDT","SOLUSDT"]
env_data = load_ticker_data(tickers)
cleaned_data = clean_data(env_data)
X_train_list, X_test_list, X_valid_list, y_train_list, y_test_list, y_valid_list = split_data(cleaned_data)
renders, features = separate_render_features(X_test_list, tickers)
price_streams = get_price_stream(renders, tickers)
binance = Exchange('binance', service=execute_order)(*price_streams)

max first date : 2020-09-01 02:00:00 max last date : 2023-01-30 17:00:00
dropped: ['tb_quote_av', 'sma5', 'ema8', 'sma8', 'ema12', 'sma12', 'ema16', 'sma16', 'ema20', 'sma20', 'ema26', 'sma26', 'rsi10', 'rsi14', 'adx17', 'rsi17', 'willR17', 'cci17', 'adx20', 'rsi20', 'stochrsi20', 'willR20', 'cci20', 'adx25', 'rsi25', 'willR25', 'cci25', 'volume_vwap', 'volatility_bbh', 'volatility_bbl', 'volatility_bbp', 'volatility_kcc', 'volatility_kch', 'volatility_kcl', 'volatility_dcl', 'volatility_dch', 'volatility_dcm', 'volatility_dcp', 'trend_macd_signal', 'trend_vortex_ind_diff', 'trend_kst_sig', 'trend_ichimoku_conv', 'trend_ichimoku_base', 'trend_ichimoku_a', 'trend_ichimoku_b', 'trend_visual_ichimoku_a', 'trend_visual_ichimoku_b', 'momentum_stoch', 'momentum_ao', 'momentum_ppo_signal', 'momentum_kama', 'alpha6', 'alpha51']
dropped: ['tb_quote_av', 'sma5', 'ema8', 'sma8', 'ema12', 'sma12', 'ema16', 'sma16', 'ema20', 'sma20', 'ema26', 'sma26', 'rsi10', 'rsi14', 'adx17', 'rsi17', 'willR17', 

In [11]:
binance.options.commission = 0.0001

In [10]:
all_features_feed = create_data_feed(features, True)
portfolio = create_portfolio(tickers,'USDT')

#winsz a modif ici aussi (winsz du sortino?)
reward_scheme = RiskAdjustedReturns(return_algorithm='sortino', window_size=24)
action_scheme = ManagedRiskOrders(
        #durations=[6],
        stop=[0.005, 0.01, 0.03], 
        take=[0.005, 0.01, 0.03],
        trade_sizes = [1/20, 1/10, 1/5, 1/2, 1]
)

#prediction du stoploss

chart_renderer = PlotlyTradingChart(
    display=True,
    height=800,
    save_format="html",
    auto_open_html=True,
)
    
renderer_feed = DataFeed([
    Stream.source(list(renders[0]["BTCUSDT:date"])).rename("date"),
    Stream.source(list(renders[0]["BTCUSDT:open"]), dtype="float").rename("open"),
    Stream.source(list(renders[0]["BTCUSDT:high"]), dtype="float").rename("high"),
    Stream.source(list(renders[0]["BTCUSDT:low"]), dtype="float").rename("low"),
    Stream.source(list(renders[0]["BTCUSDT:close"]), dtype="float").rename("close"), 
    Stream.source(list(renders[0]["BTCUSDT:volume"]), dtype="float").rename("volume"),
])

Keyser : 59, Actual E.V. ratio : 91.77


In [None]:
window_size = 24 #mieux que 48 d'zépres tests)
env = default.create(
    portfolio=portfolio,
    action_scheme=action_scheme,
    reward_scheme=reward_scheme,
    feed=all_features_feed,
    renderer_feed=renderer_feed,
    renderer=chart_renderer,
    window_size=window_size, #24
    max_allowed_loss=0.3
)

#learning rate a 8e-6 sur un notebook (ppo)

n_steps=5000
memory_capacity = n_steps * 10

seed = 1337
commission = 0.0001

save_path = 'agents/'

agent = DQNAgent(env)

agent.train(batch_size=64, 
            n_steps=n_steps, 
            n_episodes=25, 
            memory_capacity=memory_capacity, 
            save_path=save_path)

In [19]:
import pickle
def save_data_list(data_list, name, tickers):
    with open(f'{name}_{"_".join(tickers)}.pickle', 'wb') as f:
        pickle.dump(data_list, f)

def preprocess_data(tickers):
    env_data = load_ticker_data(tickers)
    cleaned_data = clean_data(env_data)
    X_train_list, X_test_list, X_valid_list, y_train_list, y_test_list, y_valid_list = split_data(cleaned_data)
    data_splits = [("train", X_train_list), ("test", X_test_list), ("valid", X_valid_list)]
    renders, features = {}, {}
    
    for name, split in data_splits:
        renders[name], features[name] = separate_render_features(split, tickers)
        
    for name in ["train", "test", "valid"]:
        save_data_list(renders[name], f'{name}_renders', tickers)
        save_data_list(features[name], f'{name}_features', tickers)

In [20]:
preprocess_data(tickers)

max first date : 2020-09-01 02:00:00 max last date : 2023-01-30 17:00:00
dropped: ['tb_quote_av', 'sma5', 'ema8', 'sma8', 'ema12', 'sma12', 'ema16', 'sma16', 'ema20', 'sma20', 'ema26', 'sma26', 'rsi10', 'rsi14', 'adx17', 'rsi17', 'willR17', 'cci17', 'adx20', 'rsi20', 'stochrsi20', 'willR20', 'cci20', 'adx25', 'rsi25', 'willR25', 'cci25', 'volume_vwap', 'volatility_bbh', 'volatility_bbl', 'volatility_bbp', 'volatility_kcc', 'volatility_kch', 'volatility_kcl', 'volatility_dcl', 'volatility_dch', 'volatility_dcm', 'volatility_dcp', 'trend_macd_signal', 'trend_vortex_ind_diff', 'trend_kst_sig', 'trend_ichimoku_conv', 'trend_ichimoku_base', 'trend_ichimoku_a', 'trend_ichimoku_b', 'trend_visual_ichimoku_a', 'trend_visual_ichimoku_b', 'momentum_stoch', 'momentum_ao', 'momentum_ppo_signal', 'momentum_kama', 'alpha6', 'alpha51']
dropped: ['tb_quote_av', 'sma5', 'ema8', 'sma8', 'ema12', 'sma12', 'ema16', 'sma16', 'ema20', 'sma20', 'ema26', 'sma26', 'rsi10', 'rsi14', 'adx17', 'rsi17', 'willR17', 

In [13]:
tickers = ["BTCUSDT","ETHUSDT","SOLUSDT"]

In [14]:
preprocess_data(tickers)

max first date : 2020-09-01 02:00:00 max last date : 2023-01-30 17:00:00
dropped: ['tb_quote_av', 'sma5', 'ema8', 'sma8', 'ema12', 'sma12', 'ema16', 'sma16', 'ema20', 'sma20', 'ema26', 'sma26', 'rsi10', 'rsi14', 'adx17', 'rsi17', 'willR17', 'cci17', 'adx20', 'rsi20', 'stochrsi20', 'willR20', 'cci20', 'adx25', 'rsi25', 'willR25', 'cci25', 'volume_vwap', 'volatility_bbh', 'volatility_bbl', 'volatility_bbp', 'volatility_kcc', 'volatility_kch', 'volatility_kcl', 'volatility_dcl', 'volatility_dch', 'volatility_dcm', 'volatility_dcp', 'trend_macd_signal', 'trend_vortex_ind_diff', 'trend_kst_sig', 'trend_ichimoku_conv', 'trend_ichimoku_base', 'trend_ichimoku_a', 'trend_ichimoku_b', 'trend_visual_ichimoku_a', 'trend_visual_ichimoku_b', 'momentum_stoch', 'momentum_ao', 'momentum_ppo_signal', 'momentum_kama', 'alpha6', 'alpha51']
dropped: ['tb_quote_av', 'sma5', 'ema8', 'sma8', 'ema12', 'sma12', 'ema16', 'sma16', 'ema20', 'sma20', 'ema26', 'sma26', 'rsi10', 'rsi14', 'adx17', 'rsi17', 'willR17', 

AttributeError: 'list' object has no attribute 'to_pickle'

In [3]:
''.join(tickers)

'BTCUSDTETHUSDTSOLUSDT'

In [None]:
def create_env(cfg):
    tickers = ["BTCUSDT","ETHUSDT","SOLUSDT"]
    env_data = load_ticker_data(tickers)
    cleaned_data = clean_data(env_data)
    X_train_list, X_test_list, X_valid_list, y_train_list, y_test_list, y_valid_list = split_data(cleaned_data)
    renders, features = separate_render_features(X_test_list, tickers)
    price_streams = get_price_stream(renders, tickers)
    binance = Exchange('binance', service=execute_order)(*price_streams)
    return env

In [None]:
https://github.com/tensortrade-org/tensortrade/blob/master/examples/train_and_evaluate.ipynb

In [81]:
utiliser le scaler des train data seulement pr le test, et si perf sont meilleures avec minmax (tester aussi sur timesnet -) et tester pca?

Object `pca` not found.


In [None]:
#https://www.tensortrade.org/en/latest/examples/setup_environment_tutorial.html
#https://github.com/tensortrade-org/tensortrade/blob/master/examples/train_and_evaluate.ipynb
https://levelup.gitconnected.com/portfolio-allocation-with-tensortrade-part-2-2-9ac30a6bcbfe
https://www.tensortrade.org/en/latest/agents/overview.html#stable-baselines
https://levelup.gitconnected.com/portfolio-allocation-with-tensortrade-part-2-2-9ac30a6bcbfe
    https://github.com/Tomas0413/tensortrade-experiments/blob/main/TensorTrade%20-%20Sinewave%20with%20SimpleProfit%20and%20ManagedRiskOrders.ipynb
    https://www.google.com/search?q=feature_engine&sourceid=chrome&ie=UTF-8
        https://github.com/tensortrade-org/tensortrade/blob/master/examples/train_and_evaluate.ipynb