<a href="https://colab.research.google.com/github/sugiyama404/ReinfoceLearningForTrading/blob/main/m_process_random.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import random
from google.colab import drive
import copy

from datetime import datetime
from multiprocessing import Process

mode = 'test'
name = 'm_process_random'

drive.mount('/content/drive/')
nov_dir = 'Colab Notebooks/dataset/reinforcement_learning/'
nov_path = '/content/drive/My Drive/' + nov_dir + f'sp500_{mode}.csv'
exp_dir = 'Colab Notebooks/workspace/export/'
csv_path = '/content/drive/My Drive/' + exp_dir + f'csv_data/{name}_{mode}.csv'

df = pd.read_csv(nov_path)
df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
class Environment:
    def __init__(self, df, initial_money=1000, mode = 'test'):

        self.df = df.dropna().reset_index()

        self.df_total_steps  = len(self.df)-1
        self.initial_money   = initial_money
        self.mode            = mode
        self.trade_time      = None
        self.trade_win       = None
        self.brfore_buy_cash = None
        self.action_space    = np.array([0, 1, 2])
        self.hold_a_position = None
        self.now_price       = None
        self.cash_in_hand    = None

        self.reset()
        
    def reset(self):

        self.trade_time      = 0
        self.trade_win       = 0
        self.brfore_buy_cash = 0
        self.now_step        = 0
        self.end_step        = self.df_total_steps
        self.hold_a_position = 0.0
        self.now_price       = self.df.loc[self.now_step, 'SP500']
        self.cash_in_hand    = self.initial_money

        return self._get_now_state()

    def step(self, action):

        prev_revenue = self._get_revenue()

        self.now_step += 1
        self.now_price = self.df.loc[self.now_step, 'SP500']
 
        done = (self.end_step == self.now_step)

        self._trade(action,done)
        cur_revenue = self._get_revenue()
        reward = cur_revenue - prev_revenue

        if self.mode == 'test':
            info = { 'cur_revenue' : cur_revenue , 'trade_time' : self.trade_time, 'trade_win' : self.trade_win }
        else:
            info = { 'cur_revenue' : cur_revenue }


        return self._get_now_state(), reward, done, info

    def _get_now_state(self):
        state = np.empty(3)
        state[0] = self.hold_a_position
        state[1] = self.now_price
        state[2] = self.cash_in_hand

        return state

    def _get_revenue(self):
        return self.hold_a_position * self.now_price + self.cash_in_hand

    def _trade(self, action,lastorder = False):

        if lastorder:
            self.cash_in_hand += self.now_price * self.hold_a_position
            self.hold_a_position = 0
        else:
            if self.action_space[0] == action: # sell
                if self.hold_a_position != 0:
                    self.cash_in_hand += self.now_price * self.hold_a_position
                    self.hold_a_position = 0
                    if self.mode == 'test':
                        self.trade_time += 1
                        if self.cash_in_hand > self.brfore_buy_cash:
                            self.trade_win += 1 
            if self.action_space[2] == action: # buy
                if self.hold_a_position == 0:
                    buy_flag = True
                    if self.mode == 'test':
                        self.brfore_buy_cash = copy.copy(self.cash_in_hand)
                    while buy_flag:
                        if self.cash_in_hand > self.now_price:
                            self.hold_a_position += 1
                            self.cash_in_hand -= self.now_price
                        else:
                            buy_flag = False

In [3]:
class Main:
    def __init__(self, env, episodes_times = 1000, mode = 'test'):
        self.env = env
        self.episodes_times = episodes_times
        self.mode = mode

        if self.mode == 'test':
            with open(csv_path, 'w') as f:
                row = 'FixedProfit,TradeTimes,TradeWin'
                print(row, file=f)
        else:
            with open(csv_path, 'w') as f:
                row = 'FixedProfit'
                print(row, file=f)

    def play_game(self):

        for episode in range(self.episodes_times):
            state = self.env.reset()
            done = False
            start_time = datetime.now()
        
            while not done:
                action = random.randrange(3)
                state, reward, done, info = self.env.step(action)
            
            play_time = datetime.now() - start_time
            if self.mode == 'test':
                print(f"Episode: {episode + 1}/{episodes_times} RapTime: {play_time} FixedProfit: {info['cur_revenue']:.0f} TradeTimes: {info['trade_time']} TradeWin: {info['trade_win']}")
                with open(csv_path, 'a') as f:
                    row = str(info['cur_revenue']) + ',' + str(info['trade_time']) + ',' + str(info['trade_win'])
                    print(row, file=f)
            else:
                print(f"Episode: {episode + 1}/{self.episodes_times} RapTime: {play_time} FixedProfit: {info['cur_revenue']:.0f}")
                with open(csv_path, 'a') as f:
                    row = str(info['cur_revenue'])
                    print(row, file=f)

In [4]:
initial_money  = 1000000
episodes_times = 25

envs = []
for i in range(4):
    env  = Environment(df, initial_money=initial_money,mode = mode)
    main = Main(env, episodes_times, mode)
    envs.append(main)

worker = []
for env in envs:
    p = Process(target=env.play_game)
    worker.append(p)
    p.start()

for w in worker:
    w.join()

Episode: 1/25 RapTime: 0:00:00.058214 FixedProfit: 999072 TradeTimes: 153 TradeWin: 85
Episode: 1/25 RapTime: 0:00:00.070950 FixedProfit: 1471790 TradeTimes: 145 TradeWin: 90
Episode: 1/25 RapTime: 0:00:00.075579 FixedProfit: 1054103 TradeTimes: 150 TradeWin: 93
Episode: 1/25 RapTime: 0:00:00.073610 FixedProfit: 1689535 TradeTimes: 146 TradeWin: 84
Episode: 2/25 RapTime: 0:00:00.072827 FixedProfit: 1459308 TradeTimes: 162 TradeWin: 103
Episode: 2/25 RapTime: 0:00:00.068259 FixedProfit: 851706 TradeTimes: 140 TradeWin: 82
Episode: 2/25 RapTime: 0:00:00.074844 FixedProfit: 1498193 TradeTimes: 146 TradeWin: 95
Episode: 2/25 RapTime: 0:00:00.077759 FixedProfit: 1255050 TradeTimes: 148 TradeWin: 92
Episode: 3/25 RapTime: 0:00:00.070957 FixedProfit: 934226 TradeTimes: 147 TradeWin: 85
Episode: 3/25 RapTime: 0:00:00.074140 FixedProfit: 1062181 TradeTimes: 148 TradeWin: 89
Episode: 3/25 RapTime: 0:00:00.062899 FixedProfit: 940755 TradeTimes: 142 TradeWin: 91
Episode: 3/25 RapTime: 0:00:00.0729