In [3]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
import random
import json


In [70]:
env = gym.make('MountainCar-v0')
env.reset()
print(env.action_space)
for i in range(1,100):

    env.step(env.action_space.sample())
    env.render()
    

Discrete(3)


In [24]:
class stock_trading_env(gym.Env):
    metadata = {'render.modes': ['human']}
    def __init__(self,df):
        super(stock_trading_env, self).__init__()
        self.df = df
        self.reward_range = (0, MAX_ACCOUNT_BALANCE) 

        self.action_space = spaces.Box(low = np.array([0,0]), high = np.array([3,1]), dtype=np.float16)
        self.observation_space = spaces.Box(low = 0, high = 1, shape = (6,6), dtype=np.float16)

    def step(self, action):      
        self._take_action(action)
        self.current_step += 1
        if self.current_step > len(self.df.loc[:, 'Open'].values) - 6:
            self.current_step = 0
        
        delay_modifier = (self.current_step / MAX_STEPS)
  
        reward = self.balance * delay_modifier
        done = self.net_worth <= 0
        obs = self._next_observation()
        return obs, reward, done, {}
    def _take_action(self, action):
        current_price = random.uniform(
            self.df.loc[self.current_step, "Open"],
            self.df.loc[self.current_step, "Close"])
        action_type = action[0]
        amount = action[1]
        if action_type < 1:
            # Buy 
            total_possible = int(self.balance / current_price)
            shares_bought = int(total_possible * amount)
            prev_cost = self.cost_basis * self.shares_held
            additional_cost = shares_bought * current_price
            self.balance -= additional_cost
            self.cost_basis = (prev_cost + additional_cost) / (self.shares_held + shares_bought)
            self.shares_held += shares_bought
        elif action_type < 2:
            # Sell 
            shares_sold = int(self.shares_held * amount)  
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_sold += shares_sold
            self.total_sales_value += shares_sold * current_price
            
        self.netWorth = self.balance + self.shares_held * current_price
        if self.net_worth > self.max_net_worth:
            self.max_net_worth = net_worth
        if self.shares_held == 0:
            self.cost_basis = 0
    
    
    def render(self, mode='human', close=False):

        profit = self.net_worth - INITIAL_ACCOUNT_BALANCE
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held} (Total sold: {self.total_shares_sold})')
        print(f'Avg cost for held shares: {self.cost_basis} (Total sales value: {self.total_sales_value})')
        print(f'Net worth: {self.net_worth} (Max net worth: {self.max_net_worth})')
        print(f'Profit: {profit}')
        
    def reset(self):
        self.balance = INITIAL_ACCOUNT_BALANCE
        self.net_worth = INITIAL_ACCOUNT_BALANCE
        self.max_net_worth = INITIAL_ACCOUNT_BALANCE
        self.shares_held = 0
        self.cost_basis = 0
        self.total_shares_sold = 0
        self.total_sales_value = 0

          
        self.current_step = random.randint(0, len(self.df.loc[:, 'Open'].values) - 6)
        return self._next_observation()
    
          
    def _next_observation(self):
        
        
      # Get the data points for the last 5 days and scale to between 0-1
        frame = np.array([
            self.df.loc[self.current_step: self.current_step +
                        5, 'Open'].values / MAX_SHARE_PRICE,
            self.df.loc[self.current_step: self.current_step +
                        5, 'High'].values / MAX_SHARE_PRICE,
            self.df.loc[self.current_step: self.current_step +
                        5, 'Low'].values / MAX_SHARE_PRICE,
            self.df.loc[self.current_step: self.current_step +
                        5, 'Close'].values / MAX_SHARE_PRICE,
            self.df.loc[self.current_step: self.current_step +
                        5, 'Volume'].values / MAX_NUM_SHARES,
           ])
          # Append additional data and scale each value to between 0-1
        obs = np.append(frame, [[
            self.balance / MAX_ACCOUNT_BALANCE,
            self.max_net_worth / MAX_ACCOUNT_BALANCE,
            self.shares_held / MAX_NUM_SHARES,
            self.cost_basis / MAX_SHARE_PRICE,
            self.total_shares_sold / MAX_NUM_SHARES,
            self.total_sales_value / (MAX_NUM_SHARES * MAX_SHARE_PRICE),
          ]], axis=0)
        return obs        

      

In [25]:
#constants
MAX_ACCOUNT_BALANCE = 2147483647
MAX_NUM_SHARES = 2147483647
MAX_SHARE_PRICE = 5000
MAX_OPEN_POSITIONS = 5
MAX_STEPS = 20000

INITIAL_ACCOUNT_BALANCE = 10000


In [26]:
env = stock_trading_env(df)
print(env.action_space)
print(env.action_space.sample())
print(env.observation_space)

Box(0.0, 3.0, (2,), float16)
[2.531   0.03775]
Box(0.0, 1.0, (6, 6), float16)


In [29]:
env.reset()

for i in range (1,10):
    env.step(env.action_space.sample())
    env.render()
    
    

Step: 3397
Balance: 10000.0
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0.0)
Net worth: 10000 (Max net worth: 10000)
Profit: 0
Step: 3398
Balance: 10000.0
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0.0)
Net worth: 10000 (Max net worth: 10000)
Profit: 0
Step: 3399
Balance: 10000.0
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0.0)
Net worth: 10000 (Max net worth: 10000)
Profit: 0
Step: 3400
Balance: 10000.0
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0.0)
Net worth: 10000 (Max net worth: 10000)
Profit: 0
Step: 3401
Balance: 10000.0
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0.0)
Net worth: 10000 (Max net worth: 10000)
Profit: 0
Step: 3402
Balance: 10000.0
Shares held: 0 (Total sold: 0)
Avg cost for held shares: 0 (Total sales value: 0.0)
Net worth: 10000 (Max net worth: 10000)
Profit: 0
Step: 3403
Balance: 10000.0


In [30]:
df = pd.read_csv('AAPL_stock.csv')

df = df.sort_values('Date')


In [34]:
current_step = 0
MAX_SHARE_PRICE = 1
MAX_NUM_SHARES = 1
frame = np.array([
    df.loc[current_step: current_step +
                5, 'Open'].values / MAX_SHARE_PRICE,
    df.loc[current_step: current_step +
                5, 'High'].values / MAX_SHARE_PRICE,
    df.loc[current_step: current_step +
                5, 'Low'].values / MAX_SHARE_PRICE,
    df.loc[current_step: current_step +
                5, 'Close'].values / MAX_SHARE_PRICE,
    df.loc[current_step: current_step +
                5, 'Volume'].values / MAX_NUM_SHARES,
   ])
print(frame)
print(df.loc[current_step: current_step +
                3, 'Open'].values)

[[1.36300e+01 1.65000e+01 1.59400e+01 1.88100e+01 1.74400e+01 1.81200e+01]
 [1.62500e+01 1.65600e+01 2.00000e+01 1.90000e+01 1.86200e+01 1.93700e+01]
 [1.35000e+01 1.51900e+01 1.47500e+01 1.73100e+01 1.69400e+01 1.75000e+01]
 [1.62500e+01 1.58800e+01 1.89400e+01 1.75000e+01 1.81900e+01 1.81900e+01]
 [6.41170e+06 5.82030e+06 1.61828e+07 9.30020e+06 6.91090e+06 7.91560e+06]]
[13.63 16.5  15.94 18.81]
