## 1. A gentle example of using ReinforcementLearning.jl

In [None]:
import Pkg;
Pkg.add("ReinforcementLearning");
using ReinforcementLearning;

In [None]:
env = RandomWalk1D()

### random policy

In [None]:
while true
    env(rand(A))
    is_terminated(env) && break
end

In [None]:
run(
    RandomPolicy(),
    RandomWalk1D(),
    StopAfterEpisode(10),
    TotalRewardPerEpisode())

### tabular policy

In [None]:
# create a tabular policy
S = state_space(env);
A = action_space(env);
NS, NA = length(S),A;
tabular_policy = TabularPolicy(;table=Dict(zip(1:NS, fill(2,NS))));

In [None]:
run(
   tabular_policy,
   RandomWalk1D(),
   StopAfterEpisode(10),
   TotalRewardPerEpisode()
)

### `QBasedPolicy`

In [None]:
Pkg.add("Flux")
using Flux: InvDecay

In [None]:
# `MonteCarloLearner + EpsilonGreedyExplorer`
policy = QBasedPolicy(
   learner = MonteCarloLearner(;
           approximator=TabularQApproximator(
               ;n_state = NS,
               n_action = NA,
               opt = InvDecay(1.0)
           )
       ),
   explorer = EpsilonGreedyExplorer(0.1)
)

In [None]:
run(
   policy,
   RandomWalk1D(),
   StopAfterEpisode(10),
   TotalRewardPerEpisode()
)

### wrap the policy + trajectory into the 'agent'

In [None]:
agent = Agent(policy=policy, trajectory=VectorSARTTrajectory())

In [None]:
run(agent, env, StopAfterEpisode(10), TotalRewardPerEpisode())