<a href="https://colab.research.google.com/github/sugiyama404/ReinfoceLearningForTrading/blob/main/m_thread_random.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import random
from google.colab import drive
import copy

from datetime import datetime
from concurrent.futures import ThreadPoolExecutor

mode = 'test'

drive.mount('/content/drive/')
nov_dir = 'Colab Notebooks/dataset/reinforcement_learning/'
nov_path = '/content/drive/My Drive/' + nov_dir + f'sp500_{mode}.csv'
exp_dir = 'Colab Notebooks/workspace/export/'
csv_path = '/content/drive/My Drive/' + exp_dir + f'csv_data/m_thread_random_{mode}.csv'

df = pd.read_csv(nov_path)
df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
class Environment:
    def __init__(self, df, initial_money=1000, mode = 'test'):

        self.df = df.dropna().reset_index()

        self.df_total_steps  = len(self.df)-1
        self.initial_money   = initial_money
        self.mode            = mode
        self.trade_time      = None
        self.trade_win       = None
        self.brfore_buy_cash = None
        self.action_space    = np.array([0, 1, 2])
        self.hold_a_position = None
        self.now_price       = None
        self.cash_in_hand    = None

        self.reset()
        
    def reset(self):

        self.trade_time      = 0
        self.trade_win       = 0
        self.brfore_buy_cash = 0
        self.now_step        = 0
        self.end_step        = self.df_total_steps
        self.hold_a_position = 0.0
        self.now_price       = self.df.loc[self.now_step, 'SP500']
        self.cash_in_hand    = self.initial_money

        return self._get_now_state()

    def step(self, action):

        prev_revenue = self._get_revenue()

        self.now_step += 1
        self.now_price = self.df.loc[self.now_step, 'SP500']
 
        done = (self.end_step == self.now_step)

        self._trade(action,done)
        cur_revenue = self._get_revenue()
        reward = cur_revenue - prev_revenue

        if self.mode == 'test':
            info = { 'cur_revenue' : cur_revenue , 'trade_time' : self.trade_time, 'trade_win' : self.trade_win }
        else:
            info = { 'cur_revenue' : cur_revenue }


        return self._get_now_state(), reward, done, info

    def _get_now_state(self):
        state = np.empty(3)
        state[0] = self.hold_a_position
        state[1] = self.now_price
        state[2] = self.cash_in_hand

        return state

    def _get_revenue(self):
        return self.hold_a_position * self.now_price + self.cash_in_hand

    def _trade(self, action,lastorder = False):

        if lastorder:
            self.cash_in_hand += self.now_price * self.hold_a_position
            self.hold_a_position = 0
        else:
            if self.action_space[0] == action: # sell
                if self.hold_a_position != 0:
                    self.cash_in_hand += self.now_price * self.hold_a_position
                    self.hold_a_position = 0
                    if self.mode == 'test':
                        self.trade_time += 1
                        if self.cash_in_hand > self.brfore_buy_cash:
                            self.trade_win += 1 
            if self.action_space[2] == action: # buy
                if self.hold_a_position == 0:
                    buy_flag = True
                    if self.mode == 'test':
                        self.brfore_buy_cash = copy.copy(self.cash_in_hand)
                    while buy_flag:
                        if self.cash_in_hand > self.now_price:
                            self.hold_a_position += 1
                            self.cash_in_hand -= self.now_price
                        else:
                            buy_flag = False

In [3]:
class Main:
    def __init__(self, env, episodes_times = 1000, mode = 'test'):
        self.env = env
        self.episodes_times = episodes_times
        self.mode = mode

        if self.mode == 'test':
            with open(csv_path, 'w') as f:
                row = 'FixedProfit,TradeTimes,TradeWin'
                print(row, file=f)
        else:
            with open(csv_path, 'w') as f:
                row = 'FixedProfit'
                print(row, file=f)

    def play_game(self):

        for episode in range(self.episodes_times):
            state = self.env.reset()
            done = False
            start_time = datetime.now()
        
            while not done:
                action = random.randrange(3)
                state, reward, done, info = self.env.step(action)
            
            play_time = datetime.now() - start_time
            if self.mode == 'test':
                print(f"Episode: {episode + 1}/{episodes_times} RapTime: {play_time} FixedProfit: {info['cur_revenue']:.0f} TradeTimes: {info['trade_time']} TradeWin: {info['trade_win']}")
                with open(csv_path, 'a') as f:
                    row = str(info['cur_revenue']) + ',' + str(info['trade_time']) + ',' + str(info['trade_win'])
                    print(row, file=f)
            else:
                print(f"Episode: {episode + 1}/{self.episodes_times} RapTime: {play_time} FixedProfit: {info['cur_revenue']:.0f}")
                with open(csv_path, 'a') as f:
                    row = str(info['cur_revenue'])
                    print(row, file=f)

In [4]:
initial_money  = 1000000
episodes_times = 25

thread_num = 4
datas = []
for i in range(thread_num):
    env  = Environment(df, initial_money=initial_money,mode = mode)
    main = Main(env, episodes_times, mode)
    datas.append(main)

features = []
with ThreadPoolExecutor(max_workers=thread_num) as executor:
    for env in datas:
        job = lambda: env.play_game()
        features.append(executor.submit(job))

Episode: 1/25 RapTime: 0:00:00.068697 FixedProfit: 1382480 TradeTimes: 148 TradeWin: 87
Episode: 1/25 RapTime: 0:00:00.060751 FixedProfit: 1477933 TradeTimes: 132 TradeWin: 80
Episode: 1/25 RapTime: 0:00:00.117331 FixedProfit: 934277 TradeTimes: 151 TradeWin: 99
Episode: 1/25 RapTime: 0:00:00.149046 FixedProfit: 1129642 TradeTimes: 144 TradeWin: 88
Episode: 2/25 RapTime: 0:00:00.064586 FixedProfit: 1486679 TradeTimes: 143 TradeWin: 79
Episode: 2/25 RapTime: 0:00:00.069079 FixedProfit: 1073802 TradeTimes: 140 TradeWin: 82
Episode: 2/25 RapTime: 0:00:00.106188 FixedProfit: 1462853 TradeTimes: 150 TradeWin: 92
Episode: 3/25 RapTime: 0:00:00.060101 FixedProfit: 1223919 TradeTimes: 136 TradeWin: 83
Episode: 2/25 RapTime: 0:00:00.077206 FixedProfit: 1449642 TradeTimes: 149 TradeWin: 86
Episode: 4/25 RapTime: 0:00:00.065180 FixedProfit: 1100441 TradeTimes: 149 TradeWin: 87
Episode: 3/25 RapTime: 0:00:00.086954 FixedProfit: 1178462 TradeTimes: 147 TradeWin: 87
Episode: 3/25 RapTime: 0:00:00.10