In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf


from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment
from tf_agents.specs import ArraySpec, BoundedArraySpec
from tf_agents.trajectories import time_step as ts
from sklearn.preprocessing import StandardScaler

In [3]:
stocks = ['JNJ', 'KO', 'T']
num_of_feat = len(stocks)
filepath = 'raw_data.csv'
episode_length = 365*5*3 #5 years. Multiply 3 for indexing each stocks

In [36]:
# NEED TO CHANGE STOCKS VARIABLE
# NEED TO CHANGE NUM_OF_FEAT VARIABLE
# NEED TO CHANGE EPISODE_LENGTH VARIABLE
# NEED TO CHANGE FILEPATH VARIABLE
# NEED TO CHANGE DF.COL[1:7] IN GET OBS
class PortfolioEnv(py_environment.PyEnvironment):
    def __init__(self):
        # Action: Portfolio ratio in stocks and cash
        self._action_spec = BoundedArraySpec(shape=(len(stocks)+1,), 
                                                       minimum=0, 
                                                       maximum=1, 
                                                       dtype=np.float32, 
                                                       name='action')
        self._observation_spec = ArraySpec(shape=(num_of_feat,),
                                                     dtype=np.float32)
        self.reset()
        self._episode_ended = False
    
    def action_spec(self):
        return self._action_spec
    
    def observation_spec(self):
        return self._observation_spec

    def _reset(self):
        # Start new episode
        self._eposode_ended = False
        self.step_index = 0
        self.return_mem = pd.DataFrame(columns=[stock+'_Close' for stock in stocks])

        # Set timeframe: 1D
        self.timeframe = pd.Timedelta(1, unit='d')
        
        # Set starting capital and value
        self.capital = 1000
        self.cash = self.capital
        self.portfolio_value = self.capital

        # Store previous prices to calculate on days market is closed
        self.prev_price_share = {}
        self.prev_price_cash = {}

        # Initiate ratio array
        self.ratio = np.zeros((len(stocks)+1))
        self.ratio[0] = 1 # Cash ratio

        # Load data file:
        self.df = pd.read_csv(filepath)
        self.df['Date'] = pd.to_datetime(self.df['Date'])

        # Initiate Standard Scaler
        self.standard_scaler = StandardScaler()

        # Initiate reward
        self.step_reward = 0

        # Randomly slicing an episode out of the raw data for training
        self.df_max_index = self.df.shape[0]
        start_point = (np.random.choice(np.arange(3,self.df_max_index - episode_length))//3) *3 
        end_point = start_point + episode_length//3 *3
        self.df = self.df.loc[start_point:end_point+2].reset_index(drop=True)

        # First observation
        self.init_time = self.df.loc[0, 'Date']
        self.current_time = self.init_time
        self.df_time_slice = self.df[self.df['Date'] == self.current_time]

        # Calculate num of share for each stock
        self.current_stock_num_distribution = self.calculate_actual_shares_from_money_split()

        # Calculate portfolio value
        self.previous_value = self.portfolio_value
        self.current_stock_money_distribution, self.portfolio_value  = self.calculate_money_from_num_stocks()
        self.ratio = self.normalize_money_dist()
        
        # State/Observation info
        info_ =  {"money_split":self.ratio,"share_num":self.current_stock_num_distribution,
                  "value":self.portfolio_value,"time":self.current_time,
                  "reward":self.step_reward,
                  "raw_output":self.get_observations_unscaled(),
                  "scaled_output":self.get_observations()}
        
        self._state = info_["scaled_output"][self.df.columns].values.flatten()
        
        info_["state"] = self._state
        reward = info_["reward"]
        

        return ts.restart(self._state)

    def _step(self):
        if self._episode_ended:
            self.reset()
        
        if sum(action) <= 0.003:
            self.ratio = [1/len(action) for a in action]
        else:
            self.ratio = [action / sum(action)]
        

        self.current_stock_num_distribution = self.calculate_actual_shares_from_money_split()
        self.step_time()
        self.step_index +=1

        # Step info
        info_ =  {"money_split":self.ratio,"share_num":self.current_stock_num_distribution,
                  "value":self.portfolio_value,"time":self.current_time,
                  "reward":self.step_reward,
                  "raw_output":self.get_observations_unscaled(),
                  "scaled_output":self.get_observations()}
        
        self._state = info_["scaled_output"][self.df.columns].values.flatten()
        info_["state"] = self._state
        reward = info_["reward"]

        self._episode_ended = True if self.index == episode_length//3 else False

        if self._episode_ended:
            reward = 0
            return ts.termination(self._state, reward)
        else:
            try:
                return ts.transition(self._state, reward=reward, discount=1)
            except Exception as e:
                print(e)
                print(self._state)
                print(reward)
                print(action)
                print(self.index)
                print(self.df_time_slice)
        

    def calculate_actual_shares_from_money_split(self):
        price_dict = self.df_time_slice[['Ticker', 'Open']].set_index('Ticker').to_dict()['Open']
        share_num = []

        for idx, stock in enumerate(stocks):
            if stock in price_dict:
                share_num.append(self.ratio[idx+1] * self.portfolio_value // price_dict[stock])
            else:
                share_num.append(self.ratio[idx+1] * self.portfolio_value // self.prev_price_share[stock])
            
        for stock in price_dict:
            self.prev_price_share[stock] = price_dict[stock]
        
        self.cash = self.ratio[0] * self.portfolio_value

        return share_num
    
    def calculate_money_from_num_stocks(self):
        price_dict = self.df_time_slice[['Ticker', 'Open']].set_index('Ticker').to_dict()['Open']
        distribution = []
        
        for idx, stock in enumerate(stocks):
            if stock in price_dict:
                distribution.append(self.current_stock_num_distribution[idx] * price_dict[stock])
            else:
                distribution.append(self.current_stock_num_distribution[i] * self.prev_price_cash[stock])
            
        for stock in stocks:
            self.prev_price_cash[stock] = price_dict[stock]

        return distribution, sum(distribution)
    
    def normalize_money_dist(self):
        normalized = []

        for idx, stock in enumerate(self.current_stock_money_distribution):
            normalized.append(stock/self.portfolio_value)
        
        return normalized
    
    def get_observations_unscaled(self):
        obs = pd.DataFrame()
        for i, frame in self.df_time_slice.groupby('Ticker'):
            temp_df = pd.DataFrame(frame[self.df.columns[1:7]].values)
            temp_df.columns = [i+"_"+c for c in self.df.columns[1:7]]
            if obs.empty:
                obs = temp_df
            else:
                obs = obs.merge(temp_df,right_index=True,left_index=True,how='inner')
        
        self.return_mem = pd.concat([self.return_mem, obs[[stock+'_Close' for stock in stocks]]], ignore_index=True)
        
        return obs

In [37]:
env = PortfolioEnv()

  normalized.append(stock/self.portfolio_value)


AttributeError: 'PortfolioEnv' object has no attribute 'get_observations'