# Imports

## Installation

In [None]:
! pip install yfinance
! pip install gym
! pip install stable-baselines3
! pip install ta
! pip install backtesting

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.2.11-py2.py3-none-any.whl (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.2/59.2 KB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting cryptography>=3.3.2
  Downloading cryptography-39.0.1-cp36-abi3-manylinux_2_28_x86_64.whl (4.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.26
  Downloading requests-2.28.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.8/62.8 KB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting html5lib>=1.1
  Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.2/112.2 KB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting frozendict>=2.3.4
  Downloading frozendict-2.3

## General

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data

In [None]:
from ta import add_all_ta_features
import yfinance as yf

## Environment

In [None]:
import gym 
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C, DDPG, DQN, PPO

## Backtesting

In [None]:
from backtesting import Backtest, Strategy

# Data

## Tech Indicators

In [None]:
def add_ta(df):
  ta_df = add_all_ta_features(df, open="Open", high="High", low="Low", close="Close", volume="Volume")
  print(ta_df.columns)
  ta_df = ta_df[["Open", "High", "Low", "Close", "Volume", "Adj Close", "volume_obv",
                                "volume_adi", "trend_adx", "momentum_ao", "trend_macd", "momentum_rsi", 
                                "momentum_stoch"]]
  ta_df = ta_df.fillna(ta_df.mean())
  return ta_df

## Stocks

1. Apple Inc. (AAPL)
2. Microsoft Corp. (MSFT)
3. Amazon.com, Inc. ( AMZN)
4. Tesla, Inc. (TSLA)
5. Nvidia Corp. (NVDA)

In [None]:
# interval = 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
# prepost = T/F

aapl_df = yf.download("AAPL", start="2018-01-01", end="2022-12-31", keepna=True)
aapl_df = aapl_df.fillna(aapl_df.mean())
aapl_df = add_ta(aapl_df)

# msft_df = yf.download("MSFT", start="2022-12-01", end="2022-12-31", keepna=True)
# amzn_df = yf.download("AMZN", start="2022-12-01", end="2022-12-31", keepna=True)
# tsla_df = yf.download("TSLA", start="2022-12-01", end="2022-12-31", keepna=True)
# nvda_df = yf.download("NVDA", start="2022-12-01", end="2022-12-31", keepna=True)

caas_df = yf.download("CAAS", start="2018-01-01", end="2022-12-31", keepna=True)
caas_df = caas_df.fillna(caas_df.mean())
caas_df = add_ta(caas_df)

# asc_df = yf.download("ASC", start="2022-12-01", end="2022-12-31", keepna=True)

[*********************100%***********************]  1 of 1 completed


  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'volume_adi',
       'volume_obv', 'volume_cmf', 'volume_fi', 'volume_em', 'volume_sma_em',
       'volume_vpt', 'volume_vwap', 'volume_mfi', 'volume_nvi',
       'volatility_bbm', 'volatility_bbh', 'volatility_bbl', 'volatility_bbw',
       'volatility_bbp', 'volatility_bbhi', 'volatility_bbli',
       'volatility_kcc', 'volatility_kch', 'volatility_kcl', 'volatility_kcw',
       'volatility_kcp', 'volatility_kchi', 'volatility_kcli',
       'volatility_dcl', 'volatility_dch', 'volatility_dcm', 'volatility_dcw',
       'volatility_dcp', 'volatility_atr', 'volatility_ui', 'trend_macd',
       'trend_macd_signal', 'trend_macd_diff', 'trend_sma_fast',
       'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow',
       'trend_vortex_ind_pos', 'trend_vortex_ind_neg', 'trend_vortex_ind_diff',
       'trend_trix', 'trend_mass_index', 'trend_dpo', 'trend_kst',
       'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv',
       

In [None]:
def add_cov(df):
  df = df.reset_index()

  cov_list = []
  return_list = []

  # look back is one year
  lookback=252
  for i in range(lookback,len(df.index.unique())):
    data_lookback = df.iloc[i-lookback:i,:]
    price_lookback=data_lookback.pivot_table(index = 'Date', values = 'Close')
    return_lookback = price_lookback.pct_change().dropna()
    return_list.append(return_lookback)

    covs = return_lookback.cov().values 
    cov_list.append(covs)


  df_cov = pd.DataFrame({'Date':df["Date"].unique()[lookback:],'cov_list':cov_list,'return_list':return_list})
  df = df.merge(df_cov, on='Date')
  df = df.sort_values(['Date']).reset_index(drop=True)
  return df

In [None]:
aapl_df = add_cov(aapl_df)
caas_df = add_cov(caas_df)

In [None]:
new_aapl_df = aapl_df.copy()
new_aapl_df["tic"] = "AAPL"
new_caas_df = caas_df.copy()
new_caas_df["tic"] = "CAAS"
mixed_df = pd.concat([new_aapl_df, new_caas_df])

# Environment

In [None]:
class StockPortfolioEnv(gym.Env):
  def __init__(self):
    pass
  
  def reset(self):
    pass
  
  def step(self, action):
    pass

## Kaggle Tutorial

In [None]:
class StockPortfolioEnv(gym.Env):
  def __init__(self, 
                df,
                stock_dim,
                hmax,
                initial_amount,
                transaction_cost_pct,
                reward_scaling,
                state_space,
                action_space,
                tech_indicator_list,
                turbulence_threshold=None,
                lookback=252,
                day = 0):
    #super(StockEnv, self).__init__()
    #money = 10 , scope = 1
    self.day = day
    self.lookback=lookback
    self.df = df
    self.stock_dim = stock_dim
    self.hmax = hmax
    self.initial_amount = initial_amount
    self.transaction_cost_pct =transaction_cost_pct
    self.reward_scaling = reward_scaling
    self.state_space = state_space
    self.action_space = action_space
    self.tech_indicator_list = tech_indicator_list

    # action_space normalization and shape is self.stock_dim
    self.action_space = gym.spaces.Box(low = -1, high = 1,shape = (self.action_space,)) 
    # Shape = ??
    # covariance matrix + technical indicators
    self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape = (1+len(self.tech_indicator_list),self.state_space))

    # load data from a pandas dataframe
    self.data = self.df.loc[self.day,:]
    self.covs = [[x[0][0] for x in self.data['cov_list']]]
    self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)
    self.terminal = False     
    self.turbulence_threshold = turbulence_threshold        
    # initalize state: inital portfolio return + individual stock return + individual weights
    self.portfolio_value = self.initial_amount

    # memorize portfolio value each step
    self.asset_memory = [self.initial_amount]
    # memorize portfolio return each step
    self.portfolio_return_memory = [0]
    self.actions_memory=[[1/self.stock_dim]*self.stock_dim]
    self.date_memory=[self.data["Date"].unique()[0]]


        
  def step(self, actions):
    # print(self.day)
    self.terminal = self.day >= len(self.df.index.unique())/self.stock_dim-1
    # print(actions)

    if self.terminal:
      df = pd.DataFrame(self.portfolio_return_memory)
      df.columns = ['daily_return']
      plt.plot(df.daily_return.cumsum(),'r')
      # plt.savefig('results/cumulative_reward.png')
      plt.close()

      plt.plot(self.portfolio_return_memory,'r')
      # plt.savefig('results/rewards.png')
      plt.close()

      print("=================================")
      print("begin_total_asset:{}".format(self.asset_memory[0]))           
      print("end_total_asset:{}".format(self.portfolio_value))

      df_daily_return = pd.DataFrame(self.portfolio_return_memory)
      df_daily_return.columns = ['daily_return']
      if df_daily_return['daily_return'].std() !=0:
        sharpe = (252**0.5)*df_daily_return['daily_return'].mean()/ \
                  df_daily_return['daily_return'].std()
        print("Sharpe: ",sharpe)
      print("=================================")

      return self.state, self.reward, self.terminal,{}

    else:
      #print("Model actions: ",actions)
      # actions are the portfolio weight
      # normalize to sum of 1
      #if (np.array(actions) - np.array(actions).min()).sum() != 0:
      #  norm_actions = (np.array(actions) - np.array(actions).min()) / (np.array(actions) - np.array(actions).min()).sum()
      #else:
      #  norm_actions = actions
      weights = self.softmax_normalization(actions) 
      #print("Normalized actions: ", weights)
      self.actions_memory.append(weights)
      last_day_memory = self.data

      #load next state
      self.day += 1
      self.data = self.df.loc[self.day,:]
      self.covs = [[x[0][0] for x in self.data['cov_list']]]
      self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)
      #print(self.state)
      # calcualte portfolio return
      # individual stocks' return * weight
      portfolio_return = sum(((self.data["Close"].values / last_day_memory["Close"].values)-1)*weights)
      # update portfolio value
      new_portfolio_value = self.portfolio_value*(1+portfolio_return)
      self.portfolio_value = new_portfolio_value

      # save into memory
      self.portfolio_return_memory.append(portfolio_return)
      self.date_memory.append(self.data["Date"].unique()[0])            
      self.asset_memory.append(new_portfolio_value)

      # the reward is the new portfolio value or end portfolo value
      self.reward = new_portfolio_value 
      #print("Step reward: ", self.reward)
      #self.reward = self.reward*self.reward_scaling

    return self.state, self.reward, self.terminal, {}

  def reset(self):
    self.asset_memory = [self.initial_amount]
    self.day = 0
    self.data = self.df.loc[self.day,:]
    # load states
    self.covs = [[x[0][0] for x in self.data['cov_list']]]
    self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)
    self.portfolio_value = self.initial_amount
    #self.cost = 0
    #self.trades = 0
    self.terminal = False 
    self.portfolio_return_memory = [0]
    self.actions_memory=[[1/self.stock_dim]*self.stock_dim]
    self.date_memory=[self.data["Date"].unique()[0]] 
    return self.state
    
  def render(self, mode='human'):
    return self.state
      
  def softmax_normalization(self, actions):
    numerator = np.exp(actions)
    denominator = np.sum(np.exp(actions))
    softmax_output = numerator/denominator
    return softmax_output

  
  def save_asset_memory(self):
    date_list = self.date_memory
    portfolio_return = self.portfolio_return_memory
    #print(len(date_list))
    #print(len(asset_list))
    df_account_value = pd.DataFrame({'Date':date_list,'daily_return':portfolio_return})
    return df_account_value

  def save_action_memory(self):
    # date and close price length must match actions length
    date_list = self.date_memory
    df_date = pd.DataFrame(date_list)
    df_date.columns = ['Date']
    
    action_list = self.actions_memory
    df_actions = pd.DataFrame(action_list)
    df_actions.columns = self.data["tic"]
    df_actions.index = df_date["Date"]
    #df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
    return df_actions

  def _seed(self, seed=None):
    self.np_random, seed = seeding.np_random(seed)
    return [seed]

  def get_sb_env(self):
    e = DummyVecEnv([lambda: self])
    obs = e.reset()
    return e, obs

In [None]:
stock_dimension = len(mixed_df["tic"].unique())
state_space = 2

env_kwargs = {
  "hmax": 100, 
  "initial_amount": 1000000, 
  "transaction_cost_pct": 0.001, 
  "state_space": state_space, 
  "stock_dim": stock_dimension, 
  "tech_indicator_list": [
    "volume_obv",
    "volume_adi", 
    "trend_adx", 
    "momentum_ao", 
    "trend_macd", 
    "momentum_rsi", 
    "momentum_stoch"
  ], 
  "action_space": stock_dimension, 
  "reward_scaling": 1e-4
    
}

env = StockPortfolioEnv(df = mixed_df, **env_kwargs)

In [None]:
env_train, _ = env.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


# Train

In [None]:
policy = "MlpPolicy"

In [None]:
a2c_agent = A2C(policy, env)
ddpg_agent = DDPG(policy, env)
# dqn_agent = DQN(policy, env)
ppo_agent = PPO(policy, env)

In [None]:
print("A2C")
a2c_result = a2c_agent.learn(
    total_timesteps=1000,
)

print("DDPG")
ddpg_result = ddpg_agent.learn(
    total_timesteps=1000,
)

print("PPO")
ppo_result = ppo_agent.learn(
    total_timesteps=1000,
)

A2C
begin_total_asset:1000000
end_total_asset:4698006.246950461
Sharpe:  1.154463925347601
DDPG
begin_total_asset:1000000
end_total_asset:4166634.2497731415
Sharpe:  0.9646565490149791
begin_total_asset:1000000
end_total_asset:3133082.761745088
Sharpe:  0.8522705448294908
PPO
begin_total_asset:1000000
end_total_asset:5376440.1625009
Sharpe:  1.3760031359356613
begin_total_asset:1000000
end_total_asset:6414873.683664131
Sharpe:  1.139750807703231
begin_total_asset:1000000
end_total_asset:4559476.167170829
Sharpe:  1.3572456864992504
begin_total_asset:1000000
end_total_asset:2713320.7485274947
Sharpe:  0.8845335036432287


# Predict

In [None]:
a2c_vec_env = a2c_result.get_env()
obs = a2c_vec_env.reset()
for i in range(1000):
    action, _states = a2c_result.predict(obs, deterministic=True)
    obs, reward, done, info = a2c_vec_env.step(action)
    # print("Step", i, reward)
    a2c_vec_env.render()

begin_total_asset:1000000
end_total_asset:3970737.807174312
Sharpe:  1.1788267877385412


# Backtest

## To explore

In [None]:
class MLTrainOnceStrategy(Strategy):
    price_delta = .004  # 0.4%

    def init(self):        
        # agent
        self.agent = ppo_result
        self.vec_env = ppo_result.get_env()
        self.obs = self.vec_env.reset()
        
        # data
        # self.data = mixed_df
        # self.data.set_index("Date")

        # Prepare empty, all-NaN forecast indicator
        self.forecasts = self.I(lambda: np.repeat(np.nan, len(self.data)), name='forecast')
    
    def next(self):
        # Proceed only with out-of-sample data. Prepare some variables
        high, low, close = self.data.High, self.data.Low, self.data.Close
        # current_time = self.data.index[-1]

        # Forecast the next movement
        action, _states = self.agent.predict(self.obs, deterministic=True)
        self.obs, forecast, done, info = self.vec_env.step(action)
        self.vec_env.render()

        # Update the plotted "forecast" indicator
        self.forecasts[-1] = forecast

        # If our forecast is upwards and we don't already hold a long position
        # place a long order for 20% of available account equity. Vice versa for short.
        # Also set target take-profit and stop-loss prices to be one price_delta
        # away from the current closing price.
        upper, lower = close[-1] * (1 + np.r_[1, -1]*self.price_delta)

        if forecast > 1000000 and not self.position.is_long:
            self.buy(size=.2, tp=upper, sl=lower)
        elif forecast <= 1000000 and not self.position.is_short:
            self.sell(size=.2, tp=lower, sl=upper)

        # Additionally, set aggressive stop-loss on trades that have been open 
        # for more than two days
        # for trade in self.trades:
        #     if current_time - trade.entry_time > pd.Timedelta('2 days'):
        #         if trade.is_long:
        #             trade.sl = max(trade.sl, low)
        #         else:
        #             trade.sl = min(trade.sl, high)

In [None]:
bt = Backtest(aapl_df, MLTrainOnceStrategy, commission=.0002, margin=.05)
bt.run()

  bt = Backtest(aapl_df, MLTrainOnceStrategy, commission=.0002, margin=.05)


begin_total_asset:1000000
end_total_asset:4074873.0195756163
Sharpe:  1.2052530696886248


Start                                     0.0
End                                    1006.0
Duration                               1006.0
Exposure Time [%]                   62.264151
Equity Final [$]                    32.985181
Equity Peak [$]                       10000.0
Return [%]                         -99.670148
Buy & Hold Return [%]              265.510909
Return (Ann.) [%]                         0.0
Volatility (Ann.) [%]                     NaN
Sharpe Ratio                              NaN
Sortino Ratio                             NaN
Calmar Ratio                              0.0
Max. Drawdown [%]                  -99.675341
Avg. Drawdown [%]                  -99.675341
Max. Drawdown Duration                 1005.0
Avg. Drawdown Duration                 1005.0
# Trades                                626.0
Win Rate [%]                        11.022364
Best Trade [%]                       0.698976
Worst Trade [%]                     -3.483065
Avg. Trade [%]                    