<a href="https://colab.research.google.com/github/sugiyama404/ReinfoceLearningForTrading/blob/main/m_thread_random.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import random
from google.colab import drive
import copy

from datetime import datetime
from concurrent.futures import ThreadPoolExecutor as PoolExecutor

drive.mount('/content/drive/')
nov_dir = 'Colab Notebooks/dataset/reinforcement_learning/'
nov_path = '/content/drive/My Drive/' + nov_dir + 'sp500_test.csv'
exp_dir = 'Colab Notebooks/workspace/export/'
csv_path = '/content/drive/My Drive/' + exp_dir + 'm_thread_random_test.csv'

df = pd.read_csv(nov_path)
df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d')

Mounted at /content/drive/


In [2]:
class Environment:
    def __init__(self, df, initial_money=100000, mode = 'test'):

        self.df = df.dropna().reset_index()
        self.df_total_steps = len(self.df)-1
        self.initial_money = initial_money
        self.mode = mode
        self.trade_time = None
        self.trade_win = None
        self.brfore_buy_cash = None
        self.action_space = np.array([0, 1, 2]) # buy,hold,sell
        self.hold_a_position = None
        self.now_price = None
        self.cash_in_hand = None

        self.reset()
        
    def reset(self):

        self.trade_time = 0
        self.trade_win = 0
        self.brfore_buy_cash = 0
        self.end_step = self.df_total_steps
        self.now_step = 0
        self.hold_a_position = 0.0
        self.now_price = self.df.loc[self.now_step, 'SP500']
        self.cash_in_hand = self.initial_money

        return self._get_now_state()

    def step(self, action):

        prev_revenue = self._get_revenue()
        self.now_step += 1
        self.now_price = self.df.loc[self.now_step, 'SP500']
 
        done = (self.end_step == self.now_step)

        self._trade(action,done)
        cur_revenue = self._get_revenue()
 
        reward = cur_revenue - prev_revenue

        if self.mode == 'test':
            info = { 'cur_revenue' : cur_revenue , 'trade_time' : self.trade_time, 'trade_win' : self.trade_win }
        else:
            info = { 'cur_revenue' : cur_revenue }

        return self._get_now_state(), reward, done, info

    def _get_now_state(self):
        state = np.empty(3)
        state[0] = self.hold_a_position
        state[1] = self.now_price
        state[2] = self.cash_in_hand
        return state

    def _get_revenue(self): 
        return self.hold_a_position * self.now_price + self.cash_in_hand

    def _trade(self, action,lastorder = False):
        if lastorder:
            self.cash_in_hand += self.now_price * self.hold_a_position
            self.hold_a_position = 0
            if self.mode == 'test':
                self.trade_time += 1
                if self.cash_in_hand > self.brfore_buy_cash:
                    self.trade_win += 1
        else:
            if self.action_space[0] == action: # buy
                if self.hold_a_position == 0:
                    buy_flag = True
                    if self.mode == 'test':
                        self.brfore_buy_cash = copy.copy(self.cash_in_hand)
                    while buy_flag:
                        if self.cash_in_hand > self.now_price:
                            self.hold_a_position += 1
                            self.cash_in_hand -= self.now_price
                        else:
                            buy_flag = False
            if self.action_space[2] == action: # sell
                if self.hold_a_position != 0:
                    self.cash_in_hand += self.now_price * self.hold_a_position
                    self.hold_a_position = 0
                    if self.mode == 'test':
                        self.trade_time += 1
                        if self.cash_in_hand > self.brfore_buy_cash:
                            self.trade_win += 1

In [3]:
def play_game(env, episodes_times = 25, mode = 'test'):

    for episode in range(episodes_times):
        state = env.reset()
        done = False
        start_time = datetime.now()
    
        while not done:
            action = random.randrange(3)
            state, reward, done, info = env.step(action)
        
        play_time = datetime.now() - start_time
        if mode == 'test':
            print(f"Episode: {episode + 1}/{episodes_times} RapTime: {play_time} FixedProfit: {info['cur_revenue']:.0f} TradeTimes: {info['trade_time']} TradeWin: {info['trade_win']}")
            with open(csv_path, 'a') as f:
                row = str(info['cur_revenue']) + ',' + str(info['trade_time']) + ',' + str(info['trade_win'])
                print(row, file=f)
        else:
            print(f"Episode: {episode + 1}/{episodes_times} RapTime: {play_time} FixedProfit: {info['cur_revenue']:.0f}")
            with open(csv_path, 'a') as f:
                row = str(info['cur_revenue'])
                print(row, file=f)

In [4]:
initial_money=1000000
episodes_times = 25
mode = 'test'

if mode == 'test':
    with open(csv_path, 'w') as f:
        row = 'FixedProfit,TradeTimes,TradeWin'
        print(row, file=f)
else:
    with open(csv_path, 'w') as f:
        row = 'FixedProfit'
        print(row, file=f)

thread_num = 4
datas = []
for i in range(thread_num):
    e = Environment(df, initial_money=initial_money,mode = mode)
    datas.append(e)

features = []
with PoolExecutor(max_workers=thread_num) as executor:
    for env in datas:
        job = lambda: play_game(env)
        features.append(executor.submit(job))

Episode: 1/25 RapTime: 0:00:00.085028 FixedProfit: 786291 TradeTimes: 147 TradeWin: 82Episode: 1/25 RapTime: 0:00:00.091349 FixedProfit: 1155850 TradeTimes: 152 TradeWin: 85

Episode: 1/25 RapTime: 0:00:00.120924 FixedProfit: 959058 TradeTimes: 150 TradeWin: 88
Episode: 1/25 RapTime: 0:00:00.093377 FixedProfit: 1078848 TradeTimes: 138 TradeWin: 83
Episode: 2/25 RapTime: 0:00:00.074374 FixedProfit: 984651 TradeTimes: 155 TradeWin: 96
Episode: 2/25 RapTime: 0:00:00.068488 FixedProfit: 829898 TradeTimes: 162 TradeWin: 89
Episode: 2/25 RapTime: 0:00:00.116960 FixedProfit: 1260325 TradeTimes: 142 TradeWin: 87
Episode: 2/25 RapTime: 0:00:00.085506 FixedProfit: 1255471 TradeTimes: 153 TradeWin: 92
Episode: 3/25 RapTime: 0:00:00.063014 FixedProfit: 1260505 TradeTimes: 146 TradeWin: 84
Episode: 3/25 RapTime: 0:00:00.092689 FixedProfit: 1370753 TradeTimes: 151 TradeWin: 88
Episode: 3/25 RapTime: 0:00:00.097508 FixedProfit: 1304697 TradeTimes: 163 TradeWin: 102
Episode: 4/25 RapTime: 0:00:00.0835