In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf


from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment
from tf_agents.specs import array_spec
from tf_agents.trajectories import time_step as ts
from sklearn.preprocessing import StandardScaler

In [3]:
stocks = []
num_of_feat = len(stocks)
filepath = ''
episode_length = 1260 #5 years

In [8]:

class PortfolioEnv(py_environment.PyEnvironment):
    def __init__(self):
        # Action: Portfolio ratio in stocks and cash
        self._action_spec = array_spec.BoundedArraySpec(shape=(len(stocks)+1), 
                                                       min=0, 
                                                       max=1, 
                                                       dtype=np.float32, 
                                                       name='action')
        self._observation_spec = array_spec.ArraySpec(shape=(num_of_feat),
                                                     dtype=np.float32)
        self.reset()
        self._episode_ended = False
    
    def action_spec(self):
        return self._action_spec
    
    def observation_spec(self):
        return self._observation_spec

    def _reset(self):
        # Start new episode
        self._eposode_ended = False
        self.index = 0

        # Set timeframe: 1D
        self.timeframe = pd.Timedelta(1, unit='d')
        
        # Set starting capital and value
        self.capital = 1000
        self.cash = self.capital
        self.portfolio_value = self.capital

        # NEED COMMENT
        self.previous_price = {}
        self.old_dict_stock_price_1 = {}
        self.old_dict_stock_price_2 = {}

        # Initiate ratio array
        self.ratio = np.zeros((len(stocks)+1))
        self.ratio[0] = 1 # Cash ratio

        # Load data file:
        self.df = pd.read_csv(filepath)
        self.df['Date'] = pd.to_datetime(self.df['Date'])

        # Initiate Standard Scaler
        self.standard_scaler = StandardScaler()

        # Initiate reward
        self.step_reward = 0

        # Randomly slicing an episode out of the raw data for training
        self.max_index = self.df.shape[0]
        start_point = (np.random.choice(np.arange(3,self.max_index - episode_length))//3) *3 
        end_point = start_point + episode_length//3 *3
        self.df = self.df.loc[start_point:end_point+2].reset_index(drop=True)

        # First observation
        self.init_time = self.df.loc[0, 'Date']
        self.current_time = self.init_time
        self.df_time_slice = self.df[self.df['Date'] == self.current_time]

        # Calculate num of share for each stock
        self.current_stock_num_distribution = self.calculate_actual_shares_from_money_split()

        # Calculate portfolio value
        self.previous_value = self.portfolio_value
        self.current_stock_money_distribution, self.portfolio_value  = self.calculate_money_from_num_stocks()
        self.ratio = self.normalize_money_dist()

        # State/Observation info
        info_ =  {"money_split":self.money_split_ratio,"share_num":self.current_stock_num_distribution,
                  "value":self.current_value,"time":self.current_time,
                  "reward":self.step_reward,
                  # "raw_output":self.get_observations_unscaled(),
                  "scaled_output":self.get_observations()}
        self._state = info_["scaled_output"][self.df.columns].values.flatten()
        reward = info_["reward"]
        self._episode_ended = True if self.index==episode_length//3 else False
        return ts.restart(self._state)
