In [12]:
# Plan: Create a trading bot
# TO DO:
# Agent: the learner or interactor(the sentient)
# Environment: the world the agent lives and interacts in
# Policy: the rules the agent follows in response to changes in the Environment
# Observation: changes in the environment of which are sent to the Agent
# Action: reactions to observations made by the agent of which are sent to the Environment

# Add a method to buy and sell certain amounts not all or nothing
# Create different dataframes in bear or bull markets and differnte intervals(1hr, 1dy, etc) and lengths(1yr, 10yrs, etc)
# create render method
# create a random agent 
# create AI agent

In [13]:
import gym
import pandas as pd
import numpy as np
from gym import spaces
from sklearn import preprocessing
from utils import TradingGraph

import warnings
warnings.filterwarnings('ignore')


In [60]:
MAX_TRADING_SESSION = 4301


class TradingEnv(gym.Env):
    metadata = {'render.modes': ['live', 'file', 'none']}
    scaler = preprocessing.MinMaxScaler()
    viewer = None

    def __init__(self, df, lookback_window_size=50, commission=0.00075, initial_balance=10000, serial=False):
        super(TradingEnv, self).__init__()
        self.df = df.dropna().reset_index()
        self.lookback_window_size = lookback_window_size
        self.initial_balance = initial_balance
        self.commission = commission
        self.serial = serial

        # actions space 0: buy 100%, 1: hold, 2, sell 100%
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=0, high=1, shape=(
            10, lookback_window_size + 1), dtype=np.float16)

    def reset(self):
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.asset_held = 0

        self._reset_session()

        self.account_history = np.repeat([
            [self.net_worth],
            [0],
            [0],
            [0],
            [0]],
             self.lookback_window_size + 1, axis=1)

        self.trades = []

        return self._next_observation()

    def step(self, action):
        prev_net_worth = self.net_worth
        current_price = self._get_current_price() + 0.01
        self._take_action(action, current_price)
        self.steps_left -= 1
        self.current_step += 1

        if self.steps_left == 0:
            self.balance += self.asset_held * current_price
            self.asset_held = 0
            self._reset_session()
        
        obs = self._next_observation()
        reward = self.net_worth - prev_net_worth
        done = self.net_worth <= 0

        return obs, reward, done, {}

    def render(self, mode='human', **kwargs):
        if mode == 'human':
            print(self.net_worth)

    def _reset_session(self):
        self.current_step = 0

        if self.serial:
            self.steps_left = len(self.df) - self.lookback_window_size - 1
            self.frame_start = self.lookback_window_size
        else:
            self.steps_left = np.random.randint(1, MAX_TRADING_SESSION)
            self.frame_start = np.random.randint(
                self.lookback_window_size, len(self.df) - self.steps_left)

        self.active_df = self.df[self.frame_start -
                                 self.lookback_window_size:self.frame_start + self.steps_left]

    def _next_observation(self):
        end = self.current_step + self.lookback_window_size + 1

        obs = np.array([
            self.active_df['Open'].values[self.current_step:end],
            self.active_df['High'].values[self.current_step:end],
            self.active_df['Low'].values[self.current_step:end],
            self.active_df['Close'].values[self.current_step:end],
            self.active_df['Volume'].values[self.current_step:end],
        ])

        scaled_history = self.scaler.fit_transform(self.account_history)

        obs = np.append(obs, scaled_history[:, -(self.lookback_window_size + 1):], axis= 0)
    
        return obs

    def _take_action(self, action, current_price):
        action_type = action

        asset_bought = 0
        asset_sold = 0
        cost = 0
        sales = 0

        if action < 1: 
            cost = self.balance
            asset_bought = (self.balance / current_price  * ( 1 - self.commission)) 
            self.asset_held += asset_bought
            self.balance = 0
        elif action > 1:
            sale = self.asset_held * current_price * (1 - self.commission)
            asset_sold = self.asset_held
            self.asset_held = 0
            self.balance += sales

        if asset_sold > 0 or asset_bought > 0:
            self.trades.append({
                'step': self.frame_start+self.current_step,
                'amount': asset_sold if asset_sold > 0 else asset_bought,
                'total': sales if asset_sold > 0 else cost,
                'type': "sell" if asset_sold > 0 else "buy"
            })
        
        self.net_worth = self.balance + self.asset_held * current_price
        self.account_history = np.append(self.account_history, [[self.net_worth], [asset_bought], [cost], [asset_sold], [sales]], axis=1)

    def _get_current_price(self):
        index = self.current_step
        low = self.df.loc[index,'Low']
        high = self.df.loc[index, 'High']
        return (low + high) / 2

In [65]:
filepath = 'data\pricedata.csv'
data = pd.read_csv(filepath)
env = TradingEnv(data, serial=True)


In [85]:
env.reset()
env.net_worth

10000

In [86]:
len(env.active_df)

4299

In [88]:
for i in range(100):
    print(env.net_worth)
    env.step(0)


10516.172748536454
10533.80897816737
10581.765794800767
10559.576924478786
10573.136789675553
10552.495817188534
10575.65124814954
10616.281815301625
10619.55271253659
10521.64292142833
10509.196702185225
10500.798831125865
10518.540121695807
10556.656230373905
10541.933690785268
10540.686967642177
10531.329540006393
10590.997149309722
10588.80487771541
10620.12004160732
10562.140411390985
10560.921704498302
10496.421291999843
10505.204386502293
10438.007409902206
10516.578984167349
10522.315311438088
10527.316212135654
10552.320715623493
10534.509384427534
10563.317093908061
10546.731473667387
10513.693310375475
10510.128242511244
10473.167804162415
10480.697171459173
10502.816001155135
10526.468720560857
10535.73509538282
10570.580306825956
10599.689190998351
10610.657553032512
10577.857527869055
10583.075554507272
10586.465520806463
10595.710883440623
10577.633397865802
10619.825870978053
10610.790630221942
10671.978121109827
10542.487011730798
10572.163224973925
10567.344429904
105