<a href="https://colab.research.google.com/github/sugiyama404/ReinfoceLearningForTrading/blob/main/simple_rl_random.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import random
from google.colab import drive
import copy

from datetime import datetime
from matplotlib import pyplot as plt


mode = 'test'
name = 'random'

drive.mount('/content/drive/')
nov_dir = 'Colab Notebooks/dataset/reinforcement_learning/'
nov_path = '/content/drive/My Drive/' + nov_dir + f'sp500_{mode}.csv'

exp_dir = 'Colab Notebooks/workspace/export/'
mdl_dir = '/content/drive/My Drive/' + exp_dir + 'models'
csv_path = '/content/drive/My Drive/' + exp_dir + f'csv_data/{name}_{mode}.csv'

df = pd.read_csv(nov_path)
df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
class Environment:
    def __init__(self, df, initial_money=100000, mode = 'test'):

        self.df = df.dropna().reset_index()
        self.df_total_steps = len(self.df)-1
        self.initial_money = initial_money
        self.mode = mode
        self.trade_time = None
        self.trade_win = None
        self.brfore_buy_cash = None
        self.action_space = np.array([0, 1, 2]) # buy,hold,sell
        self.hold_a_position = None
        self.now_price = None
        self.cash_in_hand = None

        self.reset()
        
    def reset(self):

        self.trade_time = 0
        self.trade_win = 0
        self.brfore_buy_cash = 0
        self.end_step = self.df_total_steps
        self.now_step = 0
        self.hold_a_position = 0.0
        self.now_price = self.df.loc[self.now_step, 'SP500']
        self.cash_in_hand = self.initial_money

        return self._get_now_state()

    def step(self, action):

        prev_revenue = self._get_revenue()
        self.now_step += 1
        self.now_price = self.df.loc[self.now_step, 'SP500']
 
        done = (self.end_step == self.now_step)

        self._trade(action,done)
        cur_revenue = self._get_revenue()
 
        reward = cur_revenue - prev_revenue

        if self.mode == 'test':
            info = { 'cur_revenue' : cur_revenue , 'trade_time' : self.trade_time, 'trade_win' : self.trade_win }
        else:
            info = { 'cur_revenue' : cur_revenue }

        return self._get_now_state(), reward, done, info

    def _get_now_state(self):
        state = np.empty(3)
        state[0] = self.hold_a_position
        state[1] = self.now_price
        state[2] = self.cash_in_hand
        return state

    def _get_revenue(self): 
        return self.hold_a_position * self.now_price + self.cash_in_hand

    def _trade(self, action,lastorder = False):
        if lastorder:
            self.cash_in_hand += self.now_price * self.hold_a_position
            self.hold_a_position = 0
            if self.mode == 'test':
                self.trade_time += 1
                if self.cash_in_hand > self.brfore_buy_cash:
                    self.trade_win += 1
        else:
            if self.action_space[0] == action: # buy
                if self.hold_a_position == 0:
                    buy_flag = True
                    if self.mode == 'test':
                        self.brfore_buy_cash = copy.copy(self.cash_in_hand)
                    while buy_flag:
                        if self.cash_in_hand > self.now_price:
                            self.hold_a_position += 1
                            self.cash_in_hand -= self.now_price
                        else:
                            buy_flag = False
            if self.action_space[2] == action: # sell
                if self.hold_a_position != 0:
                    self.cash_in_hand += self.now_price * self.hold_a_position
                    self.hold_a_position = 0
                    if self.mode == 'test':
                        self.trade_time += 1
                        if self.cash_in_hand > self.brfore_buy_cash:
                            self.trade_win += 1

In [3]:
class Main:
    def __init__(self, env, mdl_dir, name, episodes_times = 1000, mode = 'test'):
        self.env = env
        self.mdl_dir = mdl_dir
        self.episodes_times = episodes_times
        self.mode = mode
        self.name = name

        if self.mode == 'test':
            self.df_rec = pd.DataFrame(index=[], columns=['FixedProfit','TradeTimes','TradeWin'])
        else:
            self.df_rec = pd.DataFrame(index=[], columns=['FixedProfit'])


    def play_game(self):

        for episode in range(self.episodes_times):
            state = self.env.reset()
            done = False
            start_time = datetime.now()  
        
            while not done:
                action = random.randrange(3)
                next_state, reward, done, info = self.env.step(action)
                    
            play_time = datetime.now() - start_time
            if self.mode == 'test':
                print("Episode: {}/{} RapTime: {} FixedProfit: {:.0f} TradeTimes: {} TradeWin: {}".format(episode + 1, self.episodes_times, play_time, info['cur_revenue'], info['trade_time'], info['trade_win']))
            else:
                print("Episode: {}/{} RapTime: {} FixedProfit: {:.0f}".format(episode + 1, self.episodes_times, play_time, info['cur_revenue']))

            state = next_state

        self._save_csv()

    def _save_csv(self):
        self.df_rec.to_csv(csv_path)

In [4]:
initial_money=1000000
episodes_times = 100

env = Environment(df, initial_money, mode)
main = Main(env, mdl_dir, name, episodes_times, mode)
main.play_game()

Episode: 1/100 RapTime: 0:00:00.042816 FixedProfit: 1868846 TradeTimes: 150 TradeWin: 98
Episode: 2/100 RapTime: 0:00:00.034663 FixedProfit: 1302813 TradeTimes: 142 TradeWin: 92
Episode: 3/100 RapTime: 0:00:00.044737 FixedProfit: 1366725 TradeTimes: 156 TradeWin: 101
Episode: 4/100 RapTime: 0:00:00.038188 FixedProfit: 1238767 TradeTimes: 143 TradeWin: 88
Episode: 5/100 RapTime: 0:00:00.037154 FixedProfit: 1536347 TradeTimes: 144 TradeWin: 90
Episode: 6/100 RapTime: 0:00:00.039029 FixedProfit: 1034274 TradeTimes: 148 TradeWin: 95
Episode: 7/100 RapTime: 0:00:00.039620 FixedProfit: 1272652 TradeTimes: 152 TradeWin: 88
Episode: 8/100 RapTime: 0:00:00.040202 FixedProfit: 1546383 TradeTimes: 142 TradeWin: 86
Episode: 9/100 RapTime: 0:00:00.038176 FixedProfit: 1126295 TradeTimes: 143 TradeWin: 91
Episode: 10/100 RapTime: 0:00:00.035224 FixedProfit: 1024584 TradeTimes: 147 TradeWin: 92
Episode: 11/100 RapTime: 0:00:00.037821 FixedProfit: 1224745 TradeTimes: 144 TradeWin: 81
Episode: 12/100 Ra