# Imports 

Run these every time you start a working session since the stable baselines only work in tensorflow 1.x  .yfinance also needs to get installed. 

In [None]:
#  ! pip install tensorflow-gpu==1.15.0 tensorflow==1.15.0 stable-baselines gym-anytrading gym
#  ! pip install yfinance

In [None]:
#  ! pip install stable-baselines3

In [None]:
# Gym stuff
import gym
from gym import spaces

# Stable baselines - rl stuff
# !pip install stable_baselines3
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C
from stable_baselines3.common.logger import configure

#  yfinance stuff
import yfinance as yf

# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import random
import json
import datetime as dt
import os
from sklearn.model_selection import train_test_split
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

# visualizations
import plotly.express as px

# deep learning library
import tensorflow as tf
import collections
import tqdm

# Functions

In [None]:
def train_test_split(train_cut_off=.7, manual_data_way = False, 
                     train_date_start = None, train_date_end = None, 
                     test_date_start = None, test_date_end = None):
  '''
  Given a cut off (float between 0 and 1) returns a training / test split
  Given a range, (strings) gives train and test dfs in that range

  TO DOS: improve the range so instead of an error, it keeps trying the next day until it gets a date that is a trading day
  '''
  if manual_data_way == False:
    train_cut_off = train_cut_off
    full_time_range = full_data.shape[0]
    train_date_cutoff = int(train_cut_off*full_time_range)

    train_df = full_data.iloc[0:train_date_cutoff,:]
    test_df = full_data.iloc[train_date_cutoff:full_time_range,:]

    return train_df, test_df
  
  else:
    try:
      train_start = full_data[full_data['Date']==train_date_start].index[0]
      train_end = full_data[full_data['Date']==train_date_end].index[0]

      test_start = full_data[full_data['Date']==test_date_start].index[0]
      test_end = full_data[full_data['Date']==test_date_end].index[0]

      train_df = full_data.iloc[train_start:train_end+1,:]
      test_df = full_data.iloc[test_start:test_end+1,:]

      return train_df, test_df
    
    except:
      print('#'*40)
      print('One of the dates provded is not a trading day, check error message for detail of which day')
      print('#'*40)

def ploting_results(running_totals, test_df, action_list, cash_list):
  '''
  Plots the holdings over the course of the test data
  '''
  ## getting actual investment
  final_securities_df = pd.DataFrame(columns=['Day', 'Security', 'Holdings Value', 'Actions'])
  for idx in range(0,len(stock_list)):
    local_holdings = [one_day[idx] for one_day in running_totals]
    local_label = [stock_list[idx]]*len(running_totals)
    local_days = list(range(0,len(running_totals)))
    local_actions = [one_action[idx] for one_action in action_list]
    local_securities_df = pd.DataFrame({
        'Day' : local_days,
        'Security': local_label,
        'Holdings Value': local_holdings,
        'Actions': local_actions
    })
    final_securities_df = pd.concat([final_securities_df,local_securities_df])

  ## Getting just hold data
  final_securities_df_hold = pd.DataFrame(columns=['Day', 'Security', 'Holdings Value', 'Actions'])
  for stock in stock_list:
    first_day = test_df[stock]['Open'][5]
    possible_shares = int(INITIAL_ACCOUNT_BALANCE / first_day)
    daily_values = test_df[stock]['Open'][5:-8]*possible_shares
    days = list(range(0,test_df.shape[0]-13))
    label = [stock+str('_hold')]*(test_df.shape[0]-13)
    local_df = pd.DataFrame()
    local_actions = ['na']*(test_df.shape[0]-13)
    local_securities_df = pd.DataFrame({
          'Day' : days,
          'Security': label,
          'Holdings Value': daily_values,
          'Actions':local_actions
      })
    final_securities_df_hold = pd.concat([final_securities_df_hold,local_securities_df])
 
  ## Adding cash
  days_cash = list(range(0,len(cash_list)))
  label_cash = ['Cash']*len(cash_list)
  daily_values_cash = cash_list
  local_actions_cash = ['na']*len(cash_list)
  cash_df = pd.DataFrame({
          'Day' : days_cash,
          'Security': label_cash,
          'Holdings Value': daily_values_cash,
          'Actions':local_actions_cash
      })



  final_full_df = pd.concat([final_securities_df,final_securities_df_hold])
  final_full_df_cash = pd.concat([final_full_df,cash_df])

  # ploting
  fig = px.line(final_full_df_cash, x='Day', y='Holdings Value', color='Security', markers=False, hover_data=['Actions'])
  fig.update_layout(title_text='Test Security Holdings Over Time', title_x=0.5)
  fig.show()


def CAGR_calc(pfv):
  '''
  Returns the CARG of a model
  '''
  CAGR = np.round(((pfv[-1]/pfv[0])**(1/(len(pfv)/365)) - 1)*100,3)
  print('Compound Annual Growth Rate: '+str(CAGR))

  return CAGR   

def Colins_amazing_htan_func(action, adjust):
  '''
  htan function with an adjustment that can "pull" the function to be either more wide or more narrow
  '''
  z = ( np.exp(action/adjust)-np.exp(-action/adjust) ) / ( np.exp(action/adjust)+np.exp(-action/adjust) )
  return z 

def CAGR_just_hold(test_df):
  ## getting some numbers
  opening_prices = [test_df[stock].iloc[0,0] for stock in stock_list]
  closing_prices = [test_df[stock].iloc[-1,0] for stock in stock_list]
  growth_precentages = (np.array(closing_prices)-np.array(opening_prices))/np.array(opening_prices)
  final_profit = np.sum(growth_precentages*INITIAL_ACCOUNT_BALANCE)
  ## final CAGR calc
  t_ = test_df.shape[0]/365
  Value_final = final_profit + (INITIAL_ACCOUNT_BALANCE*len(stock_list))
  Value_start = INITIAL_ACCOUNT_BALANCE*len(stock_list)
  CAGR = np.round((((Value_final/Value_start)**(1/t_) ) - 1 )*100,3)
  print('Compound Annual Growth Rate (Just Holding): '+str(CAGR))

  return CAGR

def pie_plot_for_day(running_totals, day_in_question, cash_list):
  the_values_ = running_totals[day_in_question].copy()
  labels_ = stock_list.copy()
  the_values_.append(cash_list[day_in_question])
  labels_.append('cash')
  labels_

  pie_df = pd.DataFrame({'Value':the_values_,
                        'Labels':labels_})
  fig = px.pie(pie_df, values='Value', names='Labels', title='Portfolio Allocation on Day '+str(day_in_question))
  fig.update_traces(textposition='inside', textinfo='percent+label')
  fig.show()


# Multi-Security Method

## Pulling in Data 

In [None]:
stock_list = ['SPY', 'VFIAX','VTSAX','AAPL', 'KO', 'JNJ', 'AXP', 'LMT']

full_data = yf.download(
    tickers = stock_list,
    start = '2001-01-01',
    end = '2021-12-31',
    interval = "1d", #smallest 
    group_by = 'ticker',
        )

[*********************100%***********************]  8 of 8 completed


In [None]:
np.sum(list(np.sum(full_data.isnull())))==0 #If not true, model will nan out. 

True

In [None]:
MAX_ACCOUNT_BALANCE = 2147483647
MAX_NUM_SHARES = 2147483647
MAX_SHARE_PRICE = 1
MAX_OPEN_POSITIONS = 5
INITIAL_ACCOUNT_BALANCE = 10000

In [None]:
# GLOBAL_COUNTER_VARIABLE = 0
# total_days_GLOBAL = 10

## Actual Env

In [None]:
class StockTradingEnv_2(gym.Env):
    """Colin's stock envirnment
    NOTE: you will need to define something as the "stock_list" before creating the envirnment as one of 
    the calls in the __init__ looks for a global variable called "stock_list". 
    """

    metadata = {'render.modes': ['human']}

    def __init__(self, df, days_back):
        super(StockTradingEnv_2, self).__init__()
        # counter (for printing)
        self.counting_days = 0

        # creating the full df (one with all stocks)
        self.all_stocks_df = df
        self.days_back = days_back

        # creating a list of dfs (each index is a df for each stock)
        self.list_of_dfs = []
        for stock in stock_list:
          local_df = self.all_stocks_df[stock].reset_index()
          self.list_of_dfs.append(local_df)

        # creating reward range
        self.reward_range = (0, MAX_ACCOUNT_BALANCE)

        # getting the number of stocks
        self.stock_num = len(stock_list)

        # setting the current step to be the number of days out so enough data is available
        self.current_step = self.days_back

        # Actions of the format Buy x%, Sell x%, Hold, etc. 
        
        # getting a seperate decion / amount action for each stock 
        self.low_list = [-1]
        self.high_list = [1]
        self.final_low_list = self.low_list*self.stock_num
        self.final_high_list = self.high_list*self.stock_num
        # getting final action space
        self.action_space = spaces.Box(
            low=np.array(self.final_low_list), high=np.array(self.final_high_list), dtype = np.float64)   

        # Prices contains the OHCLV values for the last five prices
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(self.stock_num,self.days_back+1,5))
        
        # Making the list that is going to store the total available cash balance
        self.total_cash = INITIAL_ACCOUNT_BALANCE*self.stock_num
        
        # make blank df
        self.cols = ['security_ticker', 'total_cash', 'current_holdings_value', 'shares_held', 'cost_basis', 'total_shares_sold', 'total_sales_value']
        self.stock_metadata_df = pd.DataFrame(columns=self.cols)

        # add securities to df 
        for security in stock_list:
          new_row = [[security, self.total_cash, 0, 0, 0, 0, 0]]
          self.stock_metadata_df = self.stock_metadata_df.append(pd.DataFrame(new_row, columns=self.cols))
        self.stock_metadata_df = self.stock_metadata_df.set_index('security_ticker')
        
        # Keeping track of holdings
        self.possible_shares_list = []
        final_securities_df_hold = pd.DataFrame(columns=['Day', 'Security', 'Holdings Value', 'Actions'])
        for stock in stock_list:
            first_day = self.all_stocks_df[stock]['Open'][self.days_back]
            possible_shares = int(INITIAL_ACCOUNT_BALANCE / first_day)
            self.possible_shares_list.append(possible_shares)

        # running list of prior holdings_values (for potential reward calc)
        self.running_holdings_values_list = []

        # Running total portfolio value list
        self.totol_portfolio_value_list = [INITIAL_ACCOUNT_BALANCE*len(stock_list)]

        # Running action list
        self.running_action_list = []

        # Running cash list
        self.cash_list = []        


    def _next_observation(self):
        # Get the stock data points for the last 5 days and scale to between 0-1
        # gets the observation space for each stock
        list_of_frames = []
        for just_one_stock_df in self.list_of_dfs:
          local_frame = self.get_a_frame(just_one_stock_df, self.current_step)
          list_of_frames.append(local_frame)
        
        # adds in the "meta" variables (ex. account cash balance)
        obs = []
        for idx in range(0, len(list_of_frames)):
          local_obs = self.add_current_global_variables(list_of_frames[idx],stock_list[idx])
          obs.append(local_obs)
        obs = np.array(obs)

        return obs

#################################################################################


    def _take_action(self, action, stock_df, stock):
        # full continuous action space method
        current_price = stock_df.iloc[self.current_step,1]
        magnitude_of_purchase = Colins_amazing_htan_func(action ,5)
        self.brokerage_fee = .0005
        self.tax_fee_estimate = .05

        if action > 0:
          # buy shares based on magnitude of action
          total_possible = int(self.total_cash/ current_price)
          shares_bought = int(total_possible * magnitude_of_purchase)
          prev_cost = self.stock_metadata_df.loc[stock,'cost_basis'] * self.stock_metadata_df.loc[stock,'shares_held'] 
          brokerage_cost =  shares_bought * current_price * self.brokerage_fee
          additional_cost = shares_bought * current_price + brokerage_cost          

          self.total_cash  -= additional_cost
          self.stock_metadata_df.loc[stock,'cost_basis'] = (
              prev_cost + additional_cost) / (self.stock_metadata_df.loc[stock,'shares_held']  + shares_bought)
          self.stock_metadata_df.loc[stock,'shares_held']  += shares_bought
        if action < 0:
          # sell shares based on magnitude of action
          magnitude_of_purchase = np.absolute(magnitude_of_purchase)
          shares_sold = int(self.stock_metadata_df.loc[stock,'shares_held']  * magnitude_of_purchase)
          brokerage_cost =  shares_sold * current_price * (self.brokerage_fee + self.tax_fee_estimate)
          self.total_cash += (shares_sold * current_price) - brokerage_cost
          self.stock_metadata_df.loc[stock,'shares_held']  -= shares_sold
          self.stock_metadata_df.loc[stock,'total_shares_sold'] += shares_sold
          self.stock_metadata_df.loc[stock,'total_sales_value']  += shares_sold * current_price

        # removes cost if shares_held for a stock are 0, not sure why thats needed tbh
        if self.stock_metadata_df.loc[stock,'shares_held'] == 0:
            self.stock_metadata_df.loc[stock,'shares_held'] = 0
        
        # getting the current value of holdings
        self.stock_metadata_df.loc[stock,'current_holdings_value'] = current_price * self.stock_metadata_df.loc[stock,'shares_held']
        

    def step(self, action):
        # seperating the actions by security
        action_list = np.array_split(action,self.stock_num)
        self.running_action_list.append(action_list)

 
        # perform this iteration of actions for each stock, updating each stock 
        for idx in range(0,len(self.list_of_dfs)):
          self._take_action(action_list[idx], self.list_of_dfs[idx], stock_list[idx])  
        
        # adding current holdings values to running list for each stock:
        this_steps_holding_list = []
        for stock in stock_list:
          sigle_stock_holdings = self.stock_metadata_df.loc[stock,'current_holdings_value']
          this_steps_holding_list.append(sigle_stock_holdings)
        self.running_holdings_values_list.append(this_steps_holding_list)

        # adding current portfolio value to running list
        this_steps_value = self.total_cash
        for stock in stock_list:
          single_stock_value = self.stock_metadata_df.loc[stock,'current_holdings_value'] 
          this_steps_value += single_stock_value
        self.totol_portfolio_value_list.append(this_steps_value)

        # updating cash list
        self.cash_list.append(self.total_cash)

        #######################
        ##### REWARD CALC #####
        #######################

        ## (daily change in total portfolio value:)
        # prior_portfolio_value = self.totol_portfolio_value_list[-2]
        current_hold_value = 0
        for idx in range(0,len(self.list_of_dfs)):
          current_price = self.list_of_dfs[idx].iloc[self.current_step,1]
          inital_stock_count = self.possible_shares_list[idx]
          current_hold_value += current_price*inital_stock_count

        current_portfolio_value = self.totol_portfolio_value_list[-1]
        
        # punished for selling:
        sell_punishment = 0
        for action in action_list:
          if action < 0:
            sell_punishment += 0 ## NO LONGER USING but can adjust here if including again in future
          else:
            pass

        
        # punishment for using all cash too early
        Over_spend_punishment = 0
        OG_cash_amount = INITIAL_ACCOUNT_BALANCE*self.stock_num
        current_cash_amount = self.total_cash
        if current_cash_amount/OG_cash_amount < 0.1:
          Over_spend_punishment += -50

        reward = current_portfolio_value - current_hold_value + sell_punishment + Over_spend_punishment


        # Getting the current net worth
        self.all_net_worth = self.total_cash
        for idx in range(0, len(self.list_of_dfs)):
          local_df = self.list_of_dfs[idx]
          local_current_price = local_df.iloc[self.current_step,1] 
          local_stock = stock_list[idx]
          local_net_worth = (self.stock_metadata_df.loc[stock,'shares_held'] * local_current_price)
          self.all_net_worth = self.all_net_worth + local_net_worth

        done = self.all_net_worth <= 0

        obs = self._next_observation()

        self.current_step += 1

        if self.current_step + 7 >= len(self.all_stocks_df.iloc[:,1].values):
          done = True

        return obs, reward, done, {}

########################################################################

    def get_a_frame(self, stock_df, current_step_):
        # Given one of the stocks from self.list_of_dfs and the current step, 
        # returns a frame or the current observation space for that stock
        local_frame = np.array([
                          stock_df.iloc[current_step_-self.days_back: current_step_, 1].values / MAX_SHARE_PRICE,  # Open
                          stock_df.iloc[current_step_-self.days_back: current_step_, 2].values / MAX_SHARE_PRICE,  # High
                          stock_df.iloc[current_step_-self.days_back: current_step_, 3].values / MAX_SHARE_PRICE,  # Low
                          stock_df.iloc[current_step_-self.days_back: current_step_, 4].values / MAX_SHARE_PRICE,  # Close
                          stock_df.iloc[current_step_-self.days_back: current_step_, 6].values / MAX_SHARE_PRICE   # Volumne
                ]).T
        return local_frame

    def add_current_global_variables(self, frame_,stock):
      # given a frame of a stock and a string of which stock, returns 
      local_obs = np.append(frame_, [[
              self.stock_metadata_df.loc[stock,'current_holdings_value'],
              self.stock_metadata_df.loc[stock,'shares_held'],
              self.total_cash,
              self.stock_metadata_df.loc[stock,'total_shares_sold'],
              self.stock_metadata_df.loc[stock,'total_sales_value']
            ]], axis=0)
      
      return local_obs

    def reset(self):
      # Reset the state of the environment to an initial state

      # putting current step back at start 
      self.current_step = self.days_back

      return self._next_observation()

    def render(self, mode='human', close=False):
        # Render the environment to the screen

        # calcing the profit (cash + holdings_vale) - INITIAL_ACCOUNT_BALANCE
        self.profit = self.total_cash
        for stock in stock_list:
          local_profit = self.stock_metadata_df.loc[stock,'current_holdings_value'] - INITIAL_ACCOUNT_BALANCE
          self.profit += local_profit
        print('Final Profit: '+str(self.profit))
        print(f'Step: {self.current_step}')

        print('Total Percentage Increase/Decrease: ')
        print(self.profit/(INITIAL_ACCOUNT_BALANCE*len(stock_list)))
        
        return self.running_holdings_values_list, self.stock_metadata_df, self.profit, self.totol_portfolio_value_list, self.running_action_list, self.total_cash, self.cash_list

## Trying a MlpPolicy model:

In [None]:
train_df, test_df = train_test_split(.7)

In [None]:
## setting up learning:
days_back = 30
number_of_times_through_training = 10
the_learning_rate = .001
batch_size = 16
the_gamma = .25
train_df_days = train_df.shape[0] 
total_days_GLOBAL = number_of_times_through_training*train_df_days


## The algorithms require a vectorized environment to run
env = DummyVecEnv([lambda: StockTradingEnv_2(train_df.reset_index(), days_back)]) # 3

## where to save the logger
tmp_path = '/content/the_logs'
new_logger = configure(tmp_path, ["csv"])

## creating model
model = A2C("MlpPolicy", env, gamma = the_gamma, verbose=1, n_steps=batch_size, learning_rate = the_learning_rate, seed = 8)

model.learn(total_timesteps=train_df.shape[0]*number_of_times_through_training)

  "Box bound precision lowered by casting to {}".format(self.dtype)


Using cpu device
-------------------------------------
| time/                 |           |
|    fps                | 42        |
|    iterations         | 100       |
|    time_elapsed       | 37        |
|    total_timesteps    | 1600      |
| train/                |           |
|    entropy_loss       | -11.4     |
|    explained_variance | -3.58e-07 |
|    learning_rate      | 0.001     |
|    n_updates          | 99        |
|    policy_loss        | -3.32e+06 |
|    std                | 1.01      |
|    value_loss         | 7.55e+10  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 50        |
|    iterations         | 200       |
|    time_elapsed       | 63        |
|    total_timesteps    | 3200      |
| train/                |           |
|    entropy_loss       | -11.4     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.001     |
|    n_updates          | 199    

<stable_baselines3.a2c.a2c.A2C at 0x7f47e58e6f50>

In [None]:
# Seeing how it does with Test
env = DummyVecEnv([lambda: StockTradingEnv_2(test_df, days_back)])
obs = env.reset()
for i in range(test_df.shape[0]):
  action, _states = model.predict(obs)
  obs, rewards, done, info = env.step(action)
  if done:
    break
MLP_final_results = env.render()
MLP_running_totals = MLP_final_results[0]
MLP_final_df = MLP_final_results[1]
MLP_final_profit = MLP_final_results[2]
MLP_action_list = MLP_final_results[4]
MLP_cash_list = MLP_final_results[6]
MLP_final_df
MLP_pfv = MLP_final_results[3]

  "Box bound precision lowered by casting to {}".format(self.dtype)


Final Profit: 130871.55495346736
Step: 30
Total Percentage Increase/Decrease: 
1.6358944369183421


In [None]:
MLP_CAGR = CAGR_calc(MLP_pfv)
MLP_CAGR_holding = CAGR_just_hold(test_df)
MLP_final_df

Compound Annual Growth Rate: 25.657
Compound Annual Growth Rate (Just Holding): 24.213


Unnamed: 0_level_0,total_cash,current_holdings_value,shares_held,cost_basis,total_shares_sold,total_sales_value
security_ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SPY,80000,2272.400055,5,259.590934,22,5071.040009
VFIAX,80000,49724.021729,118,371.349759,669,177595.159653
VTSAX,80000,4405.439964,39,115.130923,1596,113232.199867
AAPL,80000,124863.759094,742,102.473835,6449,383793.381725
KO,80000,6406.400085,112,57.433069,1455,71011.679981
JNJ,80000,16613.189758,99,166.185915,1321,178491.210266
AXP,80000,0.0,0,0.0,0,0.0
LMT,80000,1705.0,5,336.672781,9,3328.109985


In [None]:
ploting_results(MLP_running_totals, test_df, MLP_action_list, MLP_cash_list)

In [None]:
pie_plot_for_day(MLP_running_totals,10, MLP_cash_list)

## Trying LSTM with normalizations

In [None]:
# before we were using stable_baselines 3 which does not support some of the other common libraries
from stable_baselines import A2C 
from stable_baselines.common.policies import MlpLnLstmPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.common import make_vec_env

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.




stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation.



In [None]:
train_df, test_df = train_test_split(.7)

In [None]:
## setting up learning:
days_back = 30
number_of_times_through_training = 10
the_learning_rate = .001
batch_size = 16
the_gamma = .05
train_df_days = train_df.shape[0] 
total_days_GLOBAL = number_of_times_through_training*train_df_days


## The algorithms require a vectorized environment to run
env = DummyVecEnv([lambda: StockTradingEnv_2(train_df.reset_index(), days_back)]) # 3


## where to save the logger
tmp_path = '/content/the_logs'
new_logger = configure(tmp_path, ["csv"])

## creating model
model = A2C("MlpLnLstmPolicy", env, gamma = the_gamma, verbose=1, n_steps=batch_size, learning_rate = the_learning_rate, seed = 8)

model.learn(total_timesteps=train_df.shape[0]*number_of_times_through_training)


[33mWARN: Box bound precision lowered by casting to float64[0m








Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.





Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


---------------------------------
| explained_variance | 8.76e-06 |
| fps                | 6        |
| nupdates           | 1        |
| policy_entropy     | 11.4     |
| total_timesteps    | 16       |
| value_loss         | 7.34e+05 |
---------------------------------
---------------------------------
| explained_variance | 2.44e-06 |
| fps                | 51       |
| nupdates           | 100      |
| policy_entropy     | 11.4     |
| total_timesteps    | 1600     |
| value_loss         | 5.35e+10 |
---------------------------------
----------------------------------
| explained_variance | -1.67e-06 |
| fps                | 53    

<stable_baselines.a2c.a2c.A2C at 0x7f47d95e4910>

In [None]:
# Seeing how it does with Test
env = DummyVecEnv([lambda: StockTradingEnv_2(test_df, days_back)])
obs = env.reset()
for i in range(test_df.shape[0]):
  action, _states = model.predict(obs)
  obs, rewards, done, info = env.step(action)
  if done:
    break
LSTM_N_final_results = env.render()
LSTM_N_running_totals = LSTM_N_final_results[0]
LSTM_N_final_df = LSTM_N_final_results[1]
LSTM_N_final_profit = LSTM_N_final_results[2]
LSTM_N_action_list = LSTM_N_final_results[4]
LSTM_N_cash_list = LSTM_N_final_results[6]
LSTM_N_final_df
LSTM_N_pfv = LSTM_N_final_results[3]


[33mWARN: Box bound precision lowered by casting to float64[0m


invalid value encountered in double_scalars



Final Profit: 149839.5709854641
Step: 30
Total Percentage Increase/Decrease: 
1.872994637318301


In [None]:
LSTM_N_CAGR = CAGR_calc(LSTM_N_pfv)
LSTM_N_CAGR_holding = CAGR_just_hold(test_df)
LSTM_N_final_df

Compound Annual Growth Rate: 28.233
Compound Annual Growth Rate (Just Holding): 24.213


Unnamed: 0_level_0,total_cash,current_holdings_value,shares_held,cost_basis,total_shares_sold,total_sales_value
security_ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SPY,80000,0.0,0,,0,0.0
VFIAX,80000,47195.681641,112,192.881927,0,0.0
VTSAX,80000,42585.919655,377,52.110977,0,0.0
AAPL,80000,110055.119202,654,29.644299,0,0.0
KO,80000,0.0,0,0.0,0,0.0
JNJ,80000,18794.719727,112,100.535508,0,0.0
AXP,80000,785.0,5,73.655073,120,8419.570038
LMT,80000,10230.0,30,222.002612,0,0.0


In [None]:
ploting_results(LSTM_N_running_totals, test_df, LSTM_N_action_list, LSTM_N_cash_list)

In [None]:
pie_plot_for_day(LSTM_N_running_totals,10, LSTM_N_cash_list)