# Set-up

In [7]:
import numpy as np
import pandas as pd
from tensorforce.agents import Agent

In [8]:
np.random.seed(0)
n_train = 10
X_train = pd.DataFrame(np.random.uniform(size=n_train), columns=["A"])
Z_train =  pd.DataFrame(np.array([np.random.uniform(size=n_train), np.random.uniform(size=n_train)]).T, columns=["B", "C"])
Y_train = pd.DataFrame(np.random.uniform(size=n_train), columns=["D"])

agent = Agent.create(
    agent='tensorforce', states=dict(type='float', shape=(2,), min_value=(0,0), max_value=(1,1)),
    actions=dict(type='float', shape=(1,), min_value=0, max_value=1),
    max_episode_timesteps=2, update=64,
    optimizer=dict(optimizer='adam', learning_rate=1e-3),
    objective='policy_gradient', reward_estimation=dict(horizon=1),
    exploration=10.0, parallel_interactions=n_train
)

# 1st Bug

## Bugs in batch mode of experience

In [9]:
# trying batch mode with list of dictionaries
states = [{"state": np.array(Z_train.iloc[i])} for i in range(len(Z_train))]
actions = [{"action": np.array(X_train.iloc[i])} for i in range(len(X_train))]
rewards = np.reshape(np.array(Y_train), (-1,))
terminal = np.full(rewards.shape, True)

agent.experience(states=states, actions=actions, terminal=terminal, reward=rewards)

TensorforceError: Invalid value for Agent.experience actions argument value: ArrayDict(action=[[0.5488135]]).

In [None]:
# trying batch mode with np.ndarray for actions
states = [{"state": np.array(Z_train.iloc[i])} for i in range(len(Z_train))]
actions = np.array(X_train)
rewards = np.reshape(np.array(Y_train), (-1,))
terminal = np.full(rewards.shape, True)

agent.experience(states=states, actions=actions, terminal=terminal, reward=rewards)

In [None]:
# trying batch mode with list of arrays for actions
states = [{"state": np.array(Z_train.iloc[i])} for i in range(len(Z_train))]
actions = list(np.array(X_train))
rewards = np.reshape(np.array(Y_train), (-1,))
terminal = np.full(rewards.shape, True)

agent.experience(states=states, actions=actions, terminal=terminal, reward=rewards)

## Working code for experience

In [None]:
# loop through training set
for i in range(len(X_train)):
    # define current state, action, reward in correct format
    current_state = [np.array(Z_train.iloc[i])]
    current_action = [np.array(X_train.iloc[i])]
    current_reward = list(Y_train.iloc[i])

    agent.experience(states=current_state, actions=current_action, terminal=[True], reward=current_reward)
    agent.update()


# 2nd Bug


## Bugs in input format for experience

In [12]:
# trying experience for 1 episode using dictionary for actions
states = {"state": np.array(Z_train.iloc[0])}
actions = {"action": np.array(X_train.iloc[0])}
rewards = np.reshape(np.array(Y_train.iloc[0]), ())
terminal = np.full(rewards.shape, True)

agent.experience(states=states, actions=actions, terminal=terminal, reward=rewards)

TensorforceError: Invalid value for Agent.experience actions argument value: ArrayDict(SINGLETON=ArrayDict(action=[[0.5488135]])).

In [13]:
# trying experience for 1 episode using np.ndarray for actions
states = {"state": np.array(Z_train.iloc[0])}
actions = np.array(X_train.iloc[0])
rewards = np.reshape(np.array(Y_train.iloc[0]), ())
terminal = np.full(rewards.shape, True)

agent.experience(states=states, actions=actions, terminal=terminal, reward=rewards)

TensorforceError: Invalid type for Agent.experience argument actions: <class 'numpy.ndarray'> is not dict.

## Working code for experience

In [14]:
# trying experience for 1 episode using list for actions
states = {"state": np.array(Z_train.iloc[0])}
actions = list(np.array(X_train.iloc[0]))
rewards = np.reshape(np.array(Y_train.iloc[0]), ())
terminal = np.full(rewards.shape, True)

agent.experience(states=states, actions=actions, terminal=terminal, reward=rewards)