In [4]:
!pip install alpaca_trade_api



In [5]:
import os

import alpaca_trade_api as api
from finrl.model.models import DRLAgent,DRLEnsembleAgent

In [6]:
api = api.REST('PKE6X7EC9H5U2TYNJ6XH',
        'xonFUvdNJX3vXI8b7zfjCma0qD3MqhkaBXk2BE72',
        'https://paper-api.alpaca.markets', api_version='v2')


In [None]:
def load_model(agent, model_name, env):
    
    MODELS = {"a2c": A2C, "ddpg": DDPG, "td3": TD3, "sac": SAC, "ppo": PPO}
    
    model = get_latest_file(config.TRAINED_MODEL_DIR, model_name)
    model = MODELS[agent].load(model, env=env)
    
    return model

In [None]:
def train_model_daily(model, model_name, tb_log_name, total_timesteps=5000):
    
    model = model.learn(total_timesteps=total_timesteps, tb_log_name=tb_log_name)
    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name.upper()}_{total_timesteps//1000}k")

    return model

In [None]:
def perform_trade(actions_trade):
    
    for ticker in actions_trade.columns:
        if actions_trade.iloc[-1][ticker] > 0:
            api.submit_order(
                symbol=ticker,
                qty=actions_trade.iloc[-1][ticker],
                side='buy',
                type='market',
                time_in_force='gtc'
            )
        elif actions_trade.iloc[-1][ticker] < 0:
            api.submit_order(
                symbol=ticker,
                qty= actions_trade.iloc[-1][ticker],
                side='sell',
                type='market',
                time_in_force='gtc'
            )

In [None]:
def get_latest_file(directory, file_name):
    files = glob.glob(directory + file_name)
    files.sort(key=os.path.getmtime)

    actions_trade =  sorted(files,key=os.path.getmtime)[-1]
    
    return actions_trade

In [3]:
from datetime import datetime, timedelta

validation_window = 63
rebalance_window = 63

today = datetime.today().strftime('%Y-%m-%d')
beginning = '2000-01-01'
train_start = datetime.today() - timedelta(days=validation_window + rebalance_window)

val_start = datetime.today() - timedelta(days=validation_window)
val_start = start.strftime('%Y-%m-%d')

val_end = datetime.today()
val_end = start.strftime('%Y-%m-%d')

raw_full = YahooDownloader(start_date = beginning,
                     end_date = datetime.date(2021,6,23).strftime('%Y-%m-%d'),
                     ticker_list = config.SP_500_TICKER).fetch_data()


'2021-06-19'

In [None]:
stock_dimension = len(processed_full.tic.unique())
state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

env_kwargs = {
    "hmax": 100, 
    "initial_amount": 100000, #TODO: dynamic change amount
    "buy_cost_pct": 0.001, 
    "sell_cost_pct": 0.001, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4,
    "print_verbosity":5
    
}

In [None]:
def refresh_daily_data(train_start, today):
    '''
        Return full processed 126 days data with turbulence + tech_indicators
    '''
    raw_recent = YahooDownloader(start_date = train_start,
                         end_date = today,
                         ticker_list = config.SP_500_TICKER).fetch_data()

    fe = FeatureEngineer(
                        use_technical_indicator=True,
                        tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                        use_turbulence=True,
                        user_defined_feature = False)

    processed = fe.preprocess_data(raw_recent)
    
    list_ticker = processed["tic"].unique().tolist()
    list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date,list_ticker))

    processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
    processed_full = processed_full[processed_full['date'].isin(processed['date'])]
    processed_full = processed_full.sort_values(['date','tic'])

    processed_full = processed_full.fillna(0)
    
    return processed_full

In [None]:
def calc_turb_thresh_initial(raw_full):
    '''
        Input: raw full data df
        Return initial turbulence from full data until trading day
    '''
    
    df_price_pivot = raw_full.pivot(index="date", columns="tic", values="close")
    # use returns to calculate turbulence
    df_price_pivot = df_price_pivot.pct_change()

    unique_date = raw_full.date.unique()
    # start after a year
    start = 252
    turbulence_index = [0] * start
    # turbulence_index = [0]
    count = 0
    for i in range(start, len(unique_date)):
        current_price = df_price_pivot[df_price_pivot.index == unique_date[i]]
        # use one year rolling window to calcualte covariance
        hist_price = df_price_pivot[
            (df_price_pivot.index < unique_date[i])
            & (df_price_pivot.index >= unique_date[i - 252])
        ]
        # Drop tickers which has number missing values more than the "oldest" ticker
        filtered_hist_price = hist_price.iloc[hist_price.isna().sum().min():].dropna(axis=1)

        cov_temp = filtered_hist_price.cov()
        current_temp = current_price[[x for x in filtered_hist_price]] - np.mean(filtered_hist_price, axis=0)
        temp = current_temp.values.dot(np.linalg.pinv(cov_temp)).dot(
            current_temp.values.T
        )
        if temp > 0:
            count += 1
            if count > 2:
                turbulence_temp = temp[0][0]
            else:
                # avoid large outlier because of the calculation just begins
                turbulence_temp = 0
        else:
            turbulence_temp = 0
        turbulence_index.append(turbulence_temp)

    turbulence_index = pd.DataFrame(
        {"date": df_price_pivot.index, "turbulence": turbulence_index}
    )

    insample_turbulence_threshold = np.quantile(turbulence_index, 0.85)
    
    return insample_turbulence_threshold

In [10]:
def daily_predict(df, initial_turbulence):
    
    timesteps_dict = {'a2c' : 30_000, 
                 'ppo' : 100_000, 
                 'ddpg' : 10_000
                 }
    
    last_state_ensemble = []
    insample_turbulence_threshold = initial_turbulence

    ppo_sharpe_list = []
    ddpg_sharpe_list = []
    a2c_sharpe_list = []

    model_use = ''

    # start = time.time()

    ensemble_agent = DRLEnsembleAgent(df=df,
                     rebalance_window=rebalance_window, 
                     validation_window=validation_window, 
                     **env_kwargs)

#     validation_start_date_list.append(val_start)
#     validation_end_date_list.append(val_end)
    # iteration_list.append(i)

    print("============================================")
    ## initial state is empty
    # if i - self.rebalance_window - self.validation_window == 0:
    #     # inital state
    #     initial = True
    # else:
    #     # previous state
    #     initial = False

    # Tuning trubulence index based on historical data
    # Turbulence lookback window is one quarter (63 days)
#     end_date_index = df.index[df["date"] == ensemble_agent.unique_trade_date[train_start]].to_list()[-1]
#     start_date_index = end_date_index - 63 + 1

#     historical_turbulence = df.iloc[start_date_index:(end_date_index + 1), :]

#     historical_turbulence = historical_turbulence.drop_duplicates(subset=['date'])

    historical_turbulence_mean = np.mean(df.iloc[64:, :].turbulence.values)

    print(historical_turbulence_mean)

    if historical_turbulence_mean > insample_turbulence_threshold:
        # if the mean of the historical data is greater than the 90% quantile of insample turbulence data
        # then we assume that the current market is volatile,
        # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold
        # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data
        turbulence_threshold = insample_turbulence_threshold
    else:
        # if the mean of the historical data is less than the 90% quantile of insample turbulence data
        # then we tune up the turbulence_threshold, meaning we lower the risk
        turbulence_threshold = np.quantile(turbulence_index, 1)
    print("turbulence_threshold: ", turbulence_threshold)

    ############## Environment Setup starts ##############
    ## training env
    train = data_split(ensemble_agent.df, start=ensemble_agent.train_period[0], end=ensemble_agent.unique_trade_date[train_start])
    ensemble_agent.train_env = DummyVecEnv([lambda: StockTradingEnv(train,
                                                        ensemble_agent.stock_dim,
                                                        ensemble_agent.hmax,
                                                        ensemble_agent.initial_amount,
                                                        ensemble_agent.buy_cost_pct,
                                                        ensemble_agent.sell_cost_pct,
                                                        ensemble_agent.reward_scaling,
                                                        ensemble_agent.state_space,
                                                        ensemble_agent.action_space,
                                                        ensemble_agent.tech_indicator_list,
                                                        print_verbosity=ensemble_agent.print_verbosity)])

    validation = data_split(ensemble_agent.df, start=ensemble_agent.unique_trade_date[i - ensemble_agent.rebalance_window - ensemble_agent.validation_window],
                            end=ensemble_agent.unique_trade_date[i - ensemble_agent.rebalance_window])
    ############## Environment Setup ends ##############

    ############## Validation starts ##############

    print("======A2C Validation from: ", validation_start_date, "to ",validation_end_date)
    val_env_a2c = DummyVecEnv([lambda: StockTradingEnv(validation,
                                                        ensemble_agent.stock_dim,
                                                        ensemble_agent.hmax,
                                                        ensemble_agent.initial_amount,
                                                        ensemble_agent.buy_cost_pct,
                                                        ensemble_agent.sell_cost_pct,
                                                        ensemble_agent.reward_scaling,
                                                        ensemble_agent.state_space,
                                                        ensemble_agent.action_space,
                                                        ensemble_agent.tech_indicator_list,
                                                        turbulence_threshold=turbulence_threshold,
#                                                         iteration=i,
                                                        model_name='A2C',
                                                        mode='validation',
                                                        print_verbosity=ensemble_agent.print_verbosity)])
    val_obs_a2c = val_env_a2c.reset()

    model_a2c = load_model('a2c', 'A2C_30k*', env=val_env_a2c)
    
    val_a2c_model =  ensemble_agent.DRL_validation(model=model_a2c,test_data=validation,test_env=val_env_a2c,test_obs=val_obs_a2c)
    sharpe_a2c = ensemble_agent.get_validation_sharpe(i,model_name="A2C")
    print("A2C Sharpe Ratio: ", sharpe_a2c)

    print("======PPO Validation from: ", validation_start_date, "to ",validation_end_date)
    val_env_ppo = DummyVecEnv([lambda: StockTradingEnv(validation,
                                                        ensemble_agent.stock_dim,
                                                        ensemble_agent.hmax,
                                                        ensemble_agent.initial_amount,
                                                        ensemble_agent.buy_cost_pct,
                                                        ensemble_agent.sell_cost_pct,
                                                        ensemble_agent.reward_scaling,
                                                        ensemble_agent.state_space,
                                                        ensemble_agent.action_space,
                                                        ensemble_agent.tech_indicator_list,
                                                        turbulence_threshold=turbulence_threshold,
#                                                         iteration=i,
                                                        model_name='PPO',
                                                        mode='validation',
                                                        print_verbosity=ensemble_agent.print_verbosity)])
    val_obs_ppo = val_env_ppo.reset()
    
    model_ppo = load_model('ppo', 'PPO_100k*', env=val_env_ppo)
    
    val_ppo_model = ensemble_agent.DRL_validation(model=model_ppo,test_data=validation,test_env=val_env_ppo,test_obs=val_obs_ppo)
    sharpe_ppo = ensemble_agent.get_validation_sharpe(i,model_name="PPO")
    print("PPO Sharpe Ratio: ", sharpe_ppo)

    print("======DDPG Validation from: ", validation_start_date, "to ",validation_end_date)
    val_env_ddpg = DummyVecEnv([lambda: StockTradingEnv(validation,
                                                        ensemble_agent.stock_dim,
                                                        ensemble_agent.hmax,
                                                        ensemble_agent.initial_amount,
                                                        ensemble_agent.buy_cost_pct,
                                                        ensemble_agent.sell_cost_pct,
                                                        ensemble_agent.reward_scaling,
                                                        ensemble_agent.state_space,
                                                        ensemble_agent.action_space,
                                                        ensemble_agent.tech_indicator_list,
                                                        turbulence_threshold=turbulence_threshold,
#                                                         iteration=i,
                                                        model_name='DDPG',
                                                        mode='validation',
                                                        print_verbosity=ensemble_agent.print_verbosity)])
    val_obs_ddpg = val_env_ddpg.reset()
    
    model_ddpg = load_model('ddpg', 'DDPG_10k*', env=val_env_ddpg)
    
    val_ddpg_model = ensemble_agent.DRL_validation(model=model_ddpg,test_data=validation,test_env=val_env_ddpg,test_obs=val_obs_ddpg)
    sharpe_ddpg = ensemble_agent.get_validation_sharpe(i,model_name="DDPG")

    ppo_sharpe_list.append(sharpe_ppo)
    a2c_sharpe_list.append(sharpe_a2c)
    ddpg_sharpe_list.append(sharpe_ddpg)

    print("======Best Model Retraining from: ", ensemble_agent.train_period[0], "to ",
          ensemble_agent.unique_trade_date[i - ensemble_agent.rebalance_window])
    # Environment setup for model retraining up to first trade date
    train_full = data_split(ensemble_agent.df, start=ensemble_agent.train_period[0], end=ensemble_agent.unique_trade_date[i - ensemble_agent.rebalance_window])
    train_full_env = DummyVecEnv([lambda: StockTradingEnv(train_full,
                                                        ensemble_agent.stock_dim,
                                                        ensemble_agent.hmax,
                                                        ensemble_agent.initial_amount,
                                                        ensemble_agent.buy_cost_pct,
                                                        ensemble_agent.sell_cost_pct,
                                                        ensemble_agent.reward_scaling,
                                                        ensemble_agent.state_space,
                                                        ensemble_agent.action_space,
                                                        ensemble_agent.tech_indicator_list,
                                                        print_verbosity=ensemble_agent.print_verbosity)])
    # Model Selection based on sharpe ratio
    if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg):
        model_use = 'ppo'
        model_ensemble = load_model('ppo', 'PPO_100k*', env=train_full_env)

    elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg):
        model_use = 'a2c'
        model_ensemble = load_model('a2c', 'A2C_30k*', env=train_full_env)
    else:
        model_use = 'ddpg'
        model_ensemble = load_model('ddpg', 'DDPG_10k*', env=train_full_env)
        
    model_ensemble = train_model_daily(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(model_use), total_timesteps=timesteps_dict[model_use]) #50_000
    ############## Training and Validation ends ##############

    ############## Trading starts ##############
    # TODO: Update last state from correct environment
    print("======Trading from: ", ensemble_agent.unique_trade_date[i - ensemble_agent.rebalance_window], "to ", ensemble_agent.unique_trade_date[i])
    #print("Used Model: ", model_ensemble)
    last_state_ensemble = ensemble_agent.DRL_prediction(model=model_ensemble, name="ensemble",
                                             last_state=last_state_ensemble, iter_num=i,
                                             turbulence_threshold = turbulence_threshold,
                                             initial=initial)
    ############## Trading ends ##############

In [None]:
def main():
    
    # while True:
        
    daily_processed_df = refresh_daily_data()
    init_thresh = calc_turb_thresh_initial()
    
    daily_predict(daily_processed_df, init_thresh)

    actions_trade = get_latest_file('/Users/evienguyen/Documents/FinRL/FinRL/finrl/results/','actions_trade_ensemble*'
)
    perform_trade(actions_trade=actions_trade)