In [1]:
import shutup
shutup.please()

import gym
import src.environments.continuous.stock_trading  

import numpy as np
import pandas as pd 

from tqdm.notebook import trange, tqdm

### Train the agent
* Run it until he has a running average above the success_threshold
* Use a large number of episodes for the running average ( 1000+ ) so if even it falls into a privileged sample, it wont be prone to error

In [2]:
from src.agents.ppo import PpoAgent

In [3]:
def environment(describe=True):
    env = gym.make('StockTradingEnvironment-v0',
            ticker="IBM",
            use_technical_indicators= [
            "macd",
            "boll_ub",
            "boll_lb",
            "rsi_30",
            "cci_30",
            "dx_30",
            "close_30_sma",
            "close_60_sma",
        ])
    
    env.success_threshold =0.25 # 25%

    return env

agent=PpoAgent(
        environment,
        actor_learning_rate=0.000025,
        critic_learning_rate=0.000025,
        policy="CNN",
        epochs=10,
        n_workers=8)

agent.load()
    
agent.learn(
        timesteps=-1, 
        log_every=10,
        success_threshold_lookback=1000,
        success_strict=True,
    )


@TODO
@TODO
* Models successfully loaded *


### Test the results
* Runs a set of episodes with unseen data
* Stores the results in a csv file for later consulting

In [4]:
success = 0
n_tests = 10000

scores = []
targets = []
portfolio_target_ratios = []
initial_investments = []

for i in trange(n_tests):
    state = agent.env.reset(visualize=False,mode="test")
    step = 0
    score = 0
    reward = 0
    done = False
    
    targets.append((agent.env.episode_target-agent.env.initial_investment)/agent.env.initial_investment)
    initial_investments.append(agent.env.initial_investment)

    while not done:
        state = agent.reshape_state(state)
        agent.env.render()
        #state = np.expand_dims(state, axis=0)
        action, action_onehot, prediction = agent.choose_action(state)
        # Retrieve new state, reward, and whether the state is terminal
        next_state, reward, done, info = agent.env.step(action)
        #print(action, reward, agent.env.portfolio_value)
        # Memorize (state, action, reward) for training
        #self.buffer.remember(np.expand_dims(state, axis=0), action_onehot, reward)
        # Update current state
        if done :
            if agent.env.portfolio_value > agent.env.initial_investment:
                success +=1

        step+=1
        state = next_state
        score += reward
    
    # Track scores and ratios
    scores.append(score)
    portfolio_target_ratios.append(info["portfolio_value"]/info["episode_target"] -1)
    
    
test_results_dataframe = pd.DataFrame([[
    n_tests,
    str(round(np.mean(scores)*100,3))+'%',
    str(round(np.mean(targets)*100,3))+'%',
    str(round(np.mean(portfolio_target_ratios)*100,3))+'%',
    str(round(min(scores)*100,3))+'%',
    str(round(max(scores)*100,3))+'%',
    str(round((success/n_tests)*100,3)) +'%'
]],
    columns=[
        '# Blind tests',
        '% Average portfolio return', 
        '% Desired portfolio return', 
        'Portfolio/Target rate',
        '% Historical minimum return',
        '% Historical maximum return', 
        '% Episodes concluded with positive outcome'
    ]
)



test_results_dataframe.T

  0%|          | 0/10000 [00:00<?, ?it/s]

Unnamed: 0,0
# Blind tests,10000
% Average portfolio return,24.809%
% Desired portfolio return,4.192%
Portfolio/Target rate,19.823%
% Historical minimum return,17.361%
% Historical maximum return,32.655%
% Episodes concluded with positive outcome,100.0%


In [5]:
agent.results_writer.store_test_results(agent,test_results_dataframe)

### Visual test
* Runs a set of episodes with unseen data
* See the evolution in real time


In [7]:
success = 0
n_tests = 2

scores = []
targets = []


for i in trange(n_tests):
    state = agent.env.reset(visualize=True,mode="test")
    step = 0
    score = 0
    reward = 0
    done = False
    targets.append((agent.env.episode_target-agent.env.initial_investment)/agent.env.initial_investment)
    initial_portfolio = agent.env.portfolio_value

    while not done:
        agent.env.render()
        
        # Shape the state
        state = agent.reshape_state(state)
        # Select a action
        action, action_onehot, prediction = agent.choose_action(state)
        
        # Retrieve new state, reward, and whether the state is terminal
        next_state, reward, done, _ = agent.env.step(action)
        
        # End of episode, track scores here
        if done :
            if agent.env.portfolio_value > agent.env.initial_investment:
                success +=1

        step+=1
        state = next_state
        score += reward
    
    scores.append(score)

agent.env.close()

  0%|          | 0/2 [00:00<?, ?it/s]

  img = np.fromstring(self.fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
  img = np.fromstring(self.fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')


### Transactions profit
* Track how much the agent gained by doing purchases and sales
* Doesn't track the portfolio value 
* Already has the fees discounted

In [8]:
success = 0
n_tests = 10000

test_results_data = []
test_results_columns = [
    'dataset id',
    'episode start date', 
    'episode end date',
    'episode working days',
    'total transactions',
    'total transaction profit',
    "dataframe hash",
    "agent hash"
]

print('NOTE: This test only tracks the transactions profit. ')

for i in trange(n_tests):
    trading_history = []
    state = agent.env.reset(visualize=False,mode="test")
    step = 0
    score = 0
    reward = 0
    done = False
    #targets.append((agent.env.episode_target-agent.env.initial_investment)/agent.env.initial_investment)
    #initial_portfolio = agent.env.portfolio_value
    
    j = 0
    num_stocks = 0
    while not done:
        agent.env.render()
        
        # Select desired action
        state = agent.reshape_state(state)
        action, action_onehot, prediction = agent.choose_action(state)
        current_price = agent.env.df.iloc[agent.env.current_step -1]['close']
        
        if action == agent.env.ACTIONS.BUY:
            discounted_price = current_price * (1+agent.env.fees.BUY) 
            if agent.env.stock_held < agent.env.maximum_stocks_held and agent.env.cash_in_hand >= discounted_price:
                num_stocks +=1
                trading_history.append(['buy', discounted_price])
        elif action == agent.env.ACTIONS.SELL:
            if agent.env.stock_held > 0:
                discounted_price = current_price * (1-agent.env.fees.SELL) 
                trading_history.append(['sell', discounted_price])
                num_stocks =0
                
        # Retrieve new state, reward, and whether the state is terminal
        next_state, reward, done, _ = agent.env.step(action)

        # Update current state
        if done :
            if agent.env.portfolio_value > agent.env.initial_investment:
                success +=1
                
            profits = []
            purchases = []
            for e in trading_history:
                if e[0] == 'buy':
                    purchases.append(e[1])
                else:
                    profits.append((e[1]* len(purchases) - sum(purchases)) / sum(purchases))
                    purchases.clear() 
                    
            test_results_data.append([
                agent.env.dataset_idx,
                agent.env.df.iloc[agent.env.lookback].name,
                agent.env.df.iloc[-1].name,
                agent.env.window_size,
                len(profits),
                sum(profits)*100,
                agent.env.df_name,
                agent.hash
            ])


        step+=1
        state = next_state
        score += reward
    

agent.env.close()


pd.DataFrame(test_results_data,columns=test_results_columns)

NOTE: This test only tracks the transactions profit. 


  0%|          | 0/10000 [00:00<?, ?it/s]

Unnamed: 0,dataset id,episode start date,episode end date,episode working days,total transactions,total transaction profit,dataframe hash,agent hash
0,276,2019-04-05,2019-10-18,126,13,39.615811,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
1,282,2019-04-16,2019-10-28,126,14,42.145872,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
2,285,2019-04-22,2019-10-31,126,16,31.218329,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
3,276,2019-04-05,2019-10-18,126,13,39.615811,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
4,319,2019-06-13,2019-12-23,126,16,30.447215,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
...,...,...,...,...,...,...,...,...
9995,283,2019-04-17,2019-10-29,126,16,35.118808,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
9996,297,2019-05-08,2019-11-20,126,13,37.256734,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
9997,322,2019-06-18,2019-12-27,126,18,32.364169,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
9998,318,2019-06-12,2019-12-20,126,16,31.853575,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7


In [9]:
transaction_results_df = pd.DataFrame(test_results_data,columns=test_results_columns)
transaction_results_df.sort_values(by="dataset id")

Unnamed: 0,dataset id,episode start date,episode end date,episode working days,total transactions,total transaction profit,dataframe hash,agent hash
8696,259,2019-03-13,2019-09-24,126,12,39.620471,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
3060,259,2019-03-13,2019-09-24,126,12,39.620471,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
6385,259,2019-03-13,2019-09-24,126,12,39.620471,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
4077,259,2019-03-13,2019-09-24,126,12,39.620471,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
8867,259,2019-03-13,2019-09-24,126,12,39.620471,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
...,...,...,...,...,...,...,...,...
1153,323,2019-06-19,2019-12-30,126,18,32.248565,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
4190,323,2019-06-19,2019-12-30,126,18,32.248565,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
4188,323,2019-06-19,2019-12-30,126,18,32.248565,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7
4836,323,2019-06-19,2019-12-30,126,18,32.248565,417b30a39b8996e1f8874a02acdbc6e4,0bd195653939daab0c40b62ec35b2ff7


In [10]:
failed_episodes_df = transaction_results_df[transaction_results_df['total transaction profit'] <= 0]
failed_episodes_idx = failed_episodes_df['dataset id'].unique()
failed_episodes_idx.sort()

In [11]:
len(transaction_results_df[transaction_results_df['total transaction profit'] <= 0]['dataset id'].unique()),len(transaction_results_df[transaction_results_df['total transaction profit'] > 0]['dataset id'].unique())

(0, 65)

In [12]:
import matplotlib.pyplot as plt 

for idx in failed_episodes_idx:
    plt.figure(figsize=(16,4))
    plt.plot(agent.env.df['close'])
    plt.title('#'+str(idx)+' - ' + str(len(failed_episodes_df[failed_episodes_df['dataset id'] == idx])) +'/' + str(len(transaction_results_df[transaction_results_df['dataset id'] == idx])))