In [1]:
# widgets documentation: https://medium.com/@jdchipox/how-to-interact-with-jupyter-33a98686f24e 
from IPython.display import display, Markdown, clear_output
import ipywidgets as widgets
import itertools
import random
from enum import Enum
import hashlib

study_id = None

In [25]:
box_layout = widgets.Layout(display='flex',
                            flex_flow='column',
                            align_items='center',
                            border='solid',
                            width='100%')

In [29]:
class State(Enum):
    """
    An enum for each state
    """
    HUNGRY_AND_THIRSTY         = 0
    HUNGRY_AND_NOT_THIRSTY     = 1
    NOT_HUNGRY_AND_THIRSTY     = 2
    NOT_HUNGRY_AND_NOT_THIRSTY = 3
    
    def get_label(curr_state):
        if curr_state == State.HUNGRY_AND_THIRSTY:
            return "Reward for state: hungry AND thirsty"
        elif curr_state == State.HUNGRY_AND_NOT_THIRSTY:
            return "Reward for state: hungry AND not thirsty"
        elif curr_state == State.NOT_HUNGRY_AND_NOT_THIRSTY:
            return "Reward for state: not hungry AND not thirsty"
        elif curr_state == State.NOT_HUNGRY_AND_THIRSTY:
            return "Reward for state: not hungry AND thirsty"
        else:
            return Exception("curr_state may not be a State Enum")

In [None]:
# Set up the reward selector
def construct_reward_selector():
    """
    Create the reward function selector 
    """
    sliders = {}

    for state in State:
        sliders[state] = widgets.FloatSlider(
                            value=0,
                            min=-1,
                            max=1,
                            step=0.05,
                            description=State.get_label(state),
                            layout=widgets.Layout(width='700px'),
                            style={'description_width': 'initial'})

    select_reward_fn = widgets.Box(children=list(sliders.values()), layout=box_layout)
    return select_reward_fn

In [27]:
# Setup the learning alg selector
def construct_alg_selector():
    """
    Create the algorithm selector (A2C, DDQN, or PPO)
    """
    learning_alg = {
        "algorithm": [None, "A2C", "DDQN", "PPO"],
    }

    learning_alg_widgets = {}

    for key in learning_alg.keys():
        learning_alg_widgets[key] = widgets.Dropdown(
                                    options=learning_alg[key],
                                    description=key,
                                    style={'description_width': 'initial'})

    select_learning_alg = widgets.Box(children=list(learning_alg_widgets.values()), layout=box_layout)
    return select_learning_alg

In [1]:
# setup the hyperparameter selector
def construct_hyperparam_selector(alg_name):
    """
    Create the selectors for the hyperparameters 
    """
    gammas = [None, 0, 0.5, 0.8, 0.85, 0.9, 0.99, 1.0]
    num_episodes = [None, 1000, 2500, 5000]
    learning_rates = [None, 1e-5, 1e-4, 1e-3, 1e-2]
    entropy_coeffs = [None, 1e-3, 5e-3, 1e-2, 5e-2, 0.1]
    eps_clips = [None, 0, 0.1, 0.2, 0.3]
    update_steps = [None, 1, 64, 256, 512, 1024]
    n_step_updates =  [None, 1, 32, 64, 128, 256]
    batch_sizes =  [None, 1, 32, 64, 128, 256]
    epsilon_mins = [None, 0, 0.1, 0.15, 0.2]
    epsilon_decays = [None, 100, 500, 1000, 5000, 10000, 50000]
    reward_scaling_factors = [None, 1, 5, 10, 50, 100]
    
    if alg_name == "A2C":
        hyperparams = {
            "gamma": gammas,
            "num_episodes":  num_episodes,
            "lr": learning_rates,
            "entropy_coeff": entropy_coeffs,
            "n_step_update": n_step_updates, 
            "reward_scaling_factor": reward_scaling_factors,
        }
    elif alg_name == "PPO":
        hyperparams = {
            "gamma": gammas,
            "num_episodes":  num_episodes,
            "lr": learning_rates,
            "update_steps": update_steps,
            "eps_clip": eps_clips,
            "entropy_coeff": entropy_coeffs,
            "reward_scaling_factor": reward_scaling_factors
        }
    elif alg_name == "DDQN":
        hyperparams = {
            "gamma": gammas,
            "num_episodes":  num_episodes,
            "lr": learning_rates,
            "update_steps": update_steps,
            "batch_size": batch_sizes,
            "epsilon_min": epsilon_mins,
            "epsilon_decay": epsilon_decays,
            "reward_scaling_factor": reward_scaling_factors, 
        }
    else:
        print ("ERROR; algorithm ''{}'' not recognized. Please select an algorithm!".format(alg_name))
        return


    hyperparam_widgets = {}
    
    for key in hyperparams.keys():
        hyperparam_widgets[key] = widgets.Dropdown(
                                  options=hyperparams[key],
                                  description=key,
                                  style={'description_width': 'initial'})
        
    select_learning_alg_params = widgets.Box(children=list(hyperparam_widgets.values()), layout=box_layout)
    return select_learning_alg_params

In [4]:
def get_params(widget_ref):
    """
    Read the parameters from an ipython widget 
    """
    params = {}
    if widget_ref is None or widget_ref.children is None:
        return None

    for key in widget_ref.children:
        if "children" in dir(key):
            key = key.children[1]
        params[key.description] = key.value 
    return params

In [None]:
def reward_and_alg_selector():
    """
    Set up selectors for reward function and algorithm 
    """
    
    if study_id == None:
        print ("Please create a study ID by providing your name above and try again.")
        return 
    
    sliders = {}

    for state in State:
        sliders[state] = widgets.FloatSlider(
                            value=0,
                            min=-1,
                            max=1,
                            step=0.05,
                            description=State.get_label(state),
                            layout=widgets.Layout(width='700px'),
                            style={'description_width': 'initial'})
    
    learning_alg_choices = {
        "Algorithm Choice": [None, "A2C", "DDQN", "PPO"],
    }

    learning_alg_widgets = {}

    for key in learning_alg_choices.keys():
        learning_alg_widgets[key] = widgets.Dropdown(
                                    options=learning_alg_choices[key],
                                    description=key,
                                    style={'description_width': 'initial'})
        
    selectors = [sliders.values(), learning_alg_widgets.values()]
    random.shuffle(selectors)
    
    entries = []
    for selector in selectors:
        entries += list(selector)
        
    return widgets.Box(children=entries, layout=box_layout)

In [7]:
def set_study_id():
    enter_name = widgets.Text(
        placeholder='YOUR_NAME',
        description='Your name:',
        disabled=False
    )
    
    button = widgets.Button(description="Submit")    
    out = widgets.Output(layout={'border': '0px solid black'})

    enter_name_box = widgets.VBox([widgets.HBox(children=[enter_name, button]),out])
    
    def on_button_clicked(b):
        out.clear_output()
        global study_id
        with out:
            if enter_name.value == "":
                print ("Please enter your name")
                return 
            study_id = hashlib.md5(enter_name.value.encode()).hexdigest()
            print ("Your study ID is: {}.".format(study_id))     
        
    button.on_click(on_button_clicked)
    
    return enter_name_box
