# 1 - Importa bibliotecas

In [None]:
import os
import numpy as np
import torch as th
from torch.nn import Tanh, ELU, ReLU, Sigmoid, Softmax
import websocket
import json
import time
import torch
from torch import nn
import torchvision
import random
import tqdm
from tqdm import tqdm
import gym
import stable_baselines3
from gym import Env
from gym.spaces import Discrete, Box, Tuple, MultiDiscrete
from stable_baselines3 import PPO, DQN, A2C, SAC, TD3, DDPG
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import CheckpointCallback, CallbackList, ProgressBarCallback, TensorboardCallback, EvalCallback
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from sb3_contrib import RecurrentPPO, TQC, QRDQN, MaskablePPO, TRPO, ARS

# 2 - Cria funções de encoding/decoding em json

In [None]:
class NumpyEncoder(json.JSONEncoder):
    """ Special json encoder for numpy types """
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):            
            return obj.tolist()
        
        return json.JSONEncoder.default(self, obj)

identifier ='BoxConveyor_manual'
actions =[]
message = ''


def encode_json(identifier, actions):
    data = {}
    data['identifier'] = identifier
    data['actions'] = actions
    json_data = json.dumps(data, cls=NumpyEncoder)
    return json_data

# 3 - Define função de criação do agente

In [None]:
class DrillEnv(Env):
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def _reset(self):
        
        self.reward = 0
        self.action_name = ''
        self.Can = False
        self.SensorHandling = False
        self.SensorRobot = False
        self.ConveyorPosition = 0
        actions = []
        self.number_steps = 0
        self.buffer_size = tamanho_buffer        
        self.state_buffer = np.zeros((self.buffer_size * 4))
        
    def _obs(self):
        obs= self.state_buffer                     
        return obs
        

    def setprint(self, print):
        self.print = print 
    def setprint2(self, print):
        self.print2 = print
    
    def __init__(self):     
        super(DrillEnv, self).__init__()
        self.reset()

        self.print = False
        self.print2 = False
        self.action_space = Discrete(3)       
        self.observation_space = Box(low=0, high=1, shape=(1,self.buffer_size * 4), dtype=np.float32)


    def buffer(self, Can,SensorHandling, SensorRobot, ConveyorPosition):
        
        self.state_buffer = np.roll(self.state_buffer,4)
         
        self.state_buffer[0] = Can 
        self.state_buffer[1] = SensorHandling 
        self.state_buffer[2] = SensorRobot
        self.state_buffer[3] = ConveyorPosition 

    def step(self, action):
        actions = action 
        self.number_steps += 1
        mensagem = encode_json('BoxConveyor_manual',  [actions])
        
        ws.send(mensagem)
        data =''
        station_identifier = ''
        while  station_identifier != 'BoxConveyor':
            try:
                data = json.loads(ws.recv())                
                station_identifier = (data['identifier'])
            except:
                return
        
        self.reward = int(data['reward'])
         
        if bool(data['done']) == False:
            done = False
        else:
            done = True
               
        self.Can = bool(data['states'][0])
        self.SensorHandling = bool(data['states'][1])
        self.SensorRobot = bool(data['states'][2])
        self.ConveyorPosition = float(data['states'][3])
    
        self.buffer(self.Can, self.SensorHandling, self.SensorRobot,self.ConveyorPosition) 

        # Set placeholder for info
        info = {}
        obs=self._obs()
        
        actions_dict = {
                            0: {'Forward': False, 'Backward': False},
                            1: {'Forward': True, 'Backward': False},
                            2: {'Forward': False, 'Backward': True}
                        }
        
        
        if self.print:  
            print((self.number_steps),'recompensa: ',(self.reward),'|  acao --> ', actions_dict[actions],'  done: ', done)
            for name, value in zip(['Can', 'SensorHandling', 'SensorRobot', 'ConveyorPosition'], self.state_buffer[0:4]):
                print(f"{name}: {value}")
            print('\n')
   
         # Return step information
        return obs, self.reward, done, info
    
    def render(self , mode):
        pass
    def reset(self):        
        self._reset()
        return self._obs()

# 4 - Cria o modelo em Aprendizagem por Reforço

In [None]:
algoritmo = PPO
nome_do_ficheiro = "PPO"
tamanho_buffer = 6 # tamanho do buffer aplicado ao algoritmo
funcao_ativacao = Tanh #ex: Tanh, ELU, ReLU, Sigmoid, Softmax
rede_neuronal=[dict(pi=[60,60], vf=[60,60])] # define número de nurónios na rede neuronal de política e rede neuronal de valor
tipo_camadas = 'MlpPolicy' # define o tipo de camadas do modelo !!---(Para modelo em PPORecurrent, usar MlpLstmPolicy)---!!
fator_desconto= 0.95 #fator de desconto para aprendizagem por reforço
save_path = os.path.join('Training_BoxConveyor','Model_saves',f"""{nome_do_ficheiro}_B{tamanho_buffer}""")

In [None]:
save_path = os.path.join('Training_BoxConveyor','Model_saves',f"""{nome_do_ficheiro}_B{tamanho_buffer}""")
policy_kwargs = dict(activation_fn=funcao_ativacao,net_arch=rede_neuronal)
log_path = os.path.join('Training_BoxConveyor','Logs',f"""{nome_do_ficheiro}_B{tamanho_buffer}""")

env=DrillEnv()
env.setprint(False) ### imprime os estados e ações
env=DummyVecEnv([lambda: env])
env.reset()


model=algoritmo(tipo_camadas,env,verbose=1,
        gamma=fator_desconto, gae_lambda=0.95,
        seed=9,policy_kwargs=policy_kwargs, 
        tensorboard_log=log_path)


print(model.policy)
 

# 5 - Define funções de callback

In [None]:
checkpoint_callback = CheckpointCallback(
  save_freq=10000,
  save_path=os.path.join('Training_BoxConveyor','Model_saves',f"""{nome_do_ficheiro}_B{tamanho_buffer}_checkpoint"""),
  name_prefix=f"""{nome_do_ficheiro}_B{tamanho_buffer}_checkpoint""")


eval_callback = EvalCallback(env, n_eval_episodes=3,
                             best_model_save_path=os.path.join('Training_BoxConveyor','Model_saves',f"""{nome_do_ficheiro}_B{tamanho_buffer}_backup""",'Best_model'),
                             log_path=log_path, eval_freq=4096,
                             deterministic=False, render=False)
callback = CallbackList([checkpoint_callback, ProgressBarCallback(), TensorboardCallback(), eval_callback])

# 6 - Inicia treino

In [None]:
n_steps = 15000 ### define número de passos para treino

ws = websocket.WebSocket()

ws.connect("ws://127.0.0.1:12000")
time.sleep(1)

for i in range(1):
    model.learn(total_timesteps=n_steps,log_interval=200,callback=callback)

# 7 - Salva o modelo (caso necessário)

In [None]:
model.save(save_path)

# 8 - Carrega o modelo (caso necessário)

In [None]:
nome_do_ficheiro_carregar = "PPO" ###
algoritmo_carregar = PPO ### Algoritmo a carregar
tamanho_buffer_carregar = 6

####------------------------------------***----------------------------------------------------####

tamanho_buffer = tamanho_buffer_carregar
load_path = os.path.join('Training_BoxConveyor','Model_saves',f"""{nome_do_ficheiro_carregar}_B{tamanho_buffer_carregar}""")
model = algoritmo_carregar.load(load_path)
print("Carregando ficheiro: ",nome_do_ficheiro_carregar, "no caminho: ", load_path)
print("\n","*" * 100,"\n" )
print(model.policy)

# 9 - Utiliza modelo treinado no ambiente

In [None]:

env=DrillEnv()
env.setprint(True) ### Imprime estados e ações
env=DummyVecEnv([lambda: env])
env.reset()

  
ws = websocket.WebSocket()
ws.connect("ws://127.0.0.1:12000")
time.sleep(1)
mensagem = encode_json('CanConveyor_automatico', []) ### Para deixar a esteira de latas no modo automático
ws.send(mensagem)
mensagem = encode_json('Handling_automatico', []) ### Para deixar a garra de latas no modo automático
time.sleep(1)
ws.send(mensagem)
obs = env.reset()
while True: 
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    env.render() 
    if done: 
        obs = env.reset()  