In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [2]:
from datetime import datetime
import itertools
import argparse
import re
import os
import pickle

In [3]:
from sklearn.preprocessing import StandardScaler

In [14]:
#Let's use AAPL, MSI and SBUX
def get_data():
    #returns a list of T * 3 stock prices
    #each row is a different stock
    # 0 == AAPL
    # 1 == MSI
    # 2 == SBUX
    df = pd.read_csv("C:\\Users\\mohan\\Documents\\GitHub\\machine_learning_examples\\tf2.0\\aapl_msi_sbux.csv")
    return df.values

In [18]:
def get_scaler(env):
    #return scikit-learn scaler object to scale the states
    #Note you could also populate the replay buffer here.
    
    states = []
    
    for _ in range(env.n_step):
        action = np.random.choice(env.action_space)
        state, reward, done, info = env.step(action)
        states.append(state)
        if done:
            break
    scaler = StandardScaler()
    scaler.fit(states)
    return scaler

In [19]:
def maybe_make_dir(directory):
    '''It checks if a particular directory exists if it doesn't the it creates the directory'''
    if not os.path.exists(directory):
        os.makedirs(directory)

In [20]:
class LinearModel:
    '''A linear regression Model with multiple outputs using stochastic gradient descent'''
    
    def __init__(self, input_dim, n_action):
        self.W = np.random.randn(input_data, n_action)/np.sqrt(input_dim)
        self.b = np.zeros(n_action)
        
        #momentum terms
        self.vW = 0
        self.vb = 0
        
        self.losses = [] #we will populate this at each step of a gradient decsent.
        
    def predict(self, X):
        #make sure X is N * D
        assert(len(X.shape) == 2)
        return X.dot(self.W) + self.b
    
    def sgd(self, X, Y, learning_rate=0.01, momentum=0.9):
        #make sure X is N * D
        assert(len(X.shape)==2)
        
        #Here the loss values are two dimensinal, normally we would divide by N only, but now we divide by N * K, N=1 in stohastic gradient descent
        num_values = np.prod(Y.shape)
        
        #Do one step of gradient descent
        #We mupltiply by 2 to get the exact gradient.
        Yhat = self.predict(X)
        gW = 2 * X.T.dot(Yhat - Y) / num_values
        gb = 2 * (Yhat - Y).sum() / num_values
        
        #update momentum terms
        self.vW = momentum * self.vW - learning_rate * gW
        self.vb = momentum * self.vb - learning_rate * gb
        
        self.W += self.vW
        self.b += self.vb
        
        mse = np.mean((Yhat - Y) ** 2)
        self.losses.append(mse)
        
        def load_weights(self, filepath):
            npz = np.load(filepath)
            self.W = npz['W']
            self.b = npz['b']
            
        def save_weights(self, filepath):
            np.savez(filepath, W = self.W, b = self.b)
        

In [None]:
class MultiStockEnv:
    """
    A 3 stock trading Environment
    State: vector of size 7 (n_stock * 2 + 1)
        - # shares of stock 1 owned
        - # shares of stock 2 owned
        - # shares of stock 3 owned
        - # price of stock 1(using daily close price)
        - # price of stock 2
        - # price of stock 3
        - cash owned (can be used to purchase more stocks)
    Action: categorical variable with 27(3**3) possiblites
        -for each stock you can:
        - 0 = sell
        - 1 = hold
        - 2 = buy
    """
    
    def __init__(self, data, initial_investment=20000):
        #data
        self.stock_price_history = data
        self.n_step, self.n_stock = self.stock_price_history.shape
        
        #instance attribute
        self.initial_investment = initial_investment