# Dependencies

In [1]:
import os
import random

from selenium.webdriver import Chrome
from matplotlib import pyplot as plt

from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines3.common.vec_env.vec_frame_stack import VecFrameStack

from Tensium.TensiumEnv import TensiumEnv

from Tensium.TrainAndLoggingCallback import TrainAndLoggingCallback

from Tensium.commands.SeleniumSetTextCommand import SeleniumSetTextCommand
from Tensium.commands.SeleniumClickCommand import SeleniumClickCommand

from Tensium.goals.TensiumTextEqualsGoal import TensiumTextEqualsGoal

def discount_error_login(driver: Chrome) -> bool:
    try:
        error_dialog = driver.find_element_by_css_selector(
            ".error-message-container")

        if error_dialog.text != '':
            return True
    except:
        return False

    return False


# Directories

Directories for logging and saving model

In [None]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

# Callbacks

In [None]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

# Possible Actions

List of possible actions the Selenium agent can take.

In [None]:
actions = [SeleniumSetTextCommand('#user-name', 'standard_user'), SeleniumSetTextCommand(
    '#password', 'secret_sauce'), SeleniumClickCommand('#login-button')]

# Shuffle actions list 5 times
for i in range(0,5):
    random.shuffle(actions)

# Goal definition

Let the agent know that we have reached a desired state

In [None]:
logged_in_goal = TensiumTextEqualsGoal(
    element_selector=".title", value_selector='PRODUCTS', config={
        'lower': 'true'
    })

# Environment

Create and configure our Tensium environment

In [None]:
work_dir = os.getcwd()
env = TensiumEnv(driver_path='{}\\chromedriver.exe'.format(
    work_dir), actions=actions, discounts=[discount_error_login], goal=logged_in_goal)

# Build Model

Build RL model

In [None]:
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,
            n_steps=512)
model.learn(total_timesteps=100, callback=callback)
model.save('thisisatestmodel')

# Test it out

Test out the model

In [None]:
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,
            n_steps=512)
model.learn(total_timesteps=100, callback=callback)
model.save('thisisatestmodel')