In [66]:
import numpy as np
from market import market
import pandas as pd
from randomAgent import Agent
import itertools
from IPython.display import display

In [67]:
class linearRegression:
    
    def __init__(self, dim):
        self.W = np.random.randn(dim[0],dim[1])
        self.b = np.zeros(dim[1])
        self.historical_cost = []

        
    def train(self, X, Y, learning_rate = 0.01):
        pred = self.predict(X)
        n_val = np.prod(Y.shape)
        
        gW = 2 * X.T.dot(pred - Y) / n_val
        gb = 2 * (pred - Y).sum(axis=0) / n_val
        
        self.W -= learning_rate * gW
        self.b -= learning_rate * gb
        
        self.prediction_error = np.mean((pred - Y)**2)
        self.historical_cost.append(self.prediction_error)
        
        
    def predict(self,X):
        return X.dot(self.W) + self.b



In [68]:
class SmartAgent:
    
    def __init__(self, dim):
        self.model = linearRegression((dim))
    
    def get_action(self, x):
        pred = self.model.predict(x)
        action = np.argmax(pred)
        return action


In [69]:
env = market(['AAPL','MSFT'])
env.total_days
dados_treino = int((env.total_days*0.7))
dados_treino

6352

In [70]:
train_env =  market(['AAPL','MSFT'] , start_row=0, end_row=dados_treino)
test_env =  market(['AAPL','MSFT'] , start_row=dados_treino+1)
train_env.total_days
test_env.total_days

2722

In [71]:
dim = np.array([env.state_size,len(env.action_list)])
smartagent = SmartAgent(dim)
randomagent = Agent(len(test_env.index_actions))

In [72]:
def train(agent, market , itr):

    for i in range(itr):

        state = market.start()
        done = False
        x = np.array([market._get_state()])
        v_capital = market.get_episode_value()

        while not done:
            valores_estimados = agent.model.predict(x)
            action = agent.get_action(x)
            #r = v_capital - valores_estimados[0][action]
            valores_estimados[0][action] = v_capital
            agent.model.train(x, valores_estimados)
            print(f"Day: {market.today} // state: {market._get_state()} // Ação {action} // {market.get_episode_value()}")
            print(f"Day: {market.today} // Ação {action} // {smartagent.model.prediction_error}")
            smartagent.model.historical_cost
            next_state, reward, done = market.new_day(action)
            state = next_state
            v_capital = market.get_episode_value()
            x = np.array([market._get_state()])

    return market.get_episode_value()

In [84]:
train(smartagent,train_env,2)

1694638.1601848975

In [74]:
def play_episode(agent,market):

    state = market.start()
    done = False
    x = np.array([market._get_state()])
  
    while not done:
    
        action = agent.get_action(x)
        #print(f"Day: {market.today} // state: {market._get_state()} // Ação {action} // {market.get_episode_value()}")
        next_state, reward, done = market.new_day(action)
        state = next_state
        x = np.array([market._get_state()])

    return market.get_episode_value()

In [85]:
capital_acoes_smart = play_episode(smartagent,test_env)
capital_acoes_smart

118830.3442401886

In [76]:
def play_random_episode(agent,market):
    state = market.start()
    done = False

    while not done:
        action = agent.act(state)
        next_state, reward, done = market.new_day(action)
        state = next_state
        market.get_episode_value()

    return market.get_episode_value()

In [77]:
resultado=0
loops = 10
for i in range(loops):
    capital_acoes_aleatorias = play_random_episode(randomagent,test_env)
    resultado += capital_acoes_aleatorias
    if capital_acoes_aleatorias>resultado:
        resultado=capital_acoes_aleatorias
  
media_resultado = resultado/loops  

In [78]:
#print(f'Tivemos um resultado {round((capital_acoes_smart-media_resultado)/media_resultado*100,)} % melhor')
print(f'Tivemos um resultado {round((capital_acoes_smart/media_resultado),2)}x melhor (+{round((capital_acoes_smart-media_resultado)/media_resultado*100,)} %)')

Tivemos um resultado 2.4x melhor (+140 %)
