# Sarsa on Gym 

In [None]:
import numpy as np
import gym
from IPython.display import clear_output
import time

## Parameters

In [None]:
lr = 0.33
lrMin = 0.001
lrDecay = 0.9999
gamma = 1.0
epsilon = 1.0
epsilonMin = 0.001
epsilonDecay = 0.97
episodes = 2000

## Setup Gym Env

In [None]:
env = gym.make('FrozenLake-v0')

## Initialize Q Tables

In [None]:
Q = np.zeros((env.observation_space.n, env.action_space.n))
print(Q)

## Sarsa Learning

In [None]:
def choose_action(s, Q):
    if np.random.random() < epsilon:
        a = np.random.randint(0, env.action_space.n)
    else:
        a = np.argmax(Q[s,:])
    return a

In [None]:
for i in range(episodes):
    print("Episode {}/{}".format(i + 1, episodes))
    s = env.reset()
    done = False
    
    a = choose_action(s, Q)
    while not done:
        s_, r, done, _ = env.step(a)
        a_ = choose_action(s_, Q)
        Q[s,a] = Q[s,a] + lr*(r+gamma*(Q[s_,a_]) - Q[s,a])
        s = s_
        a = a_
        
        if lr > lrMin:
            lr *= lrDecay

        if not r==0 and epsilon > epsilonMin:
            epsilon *= epsilonDecay

## Final Q Tables

In [None]:
print(Q)

## Testing

In [None]:
s = env.reset()
done = False
step_count = 0

while not done:
    clear_output(wait=True)
    env.render()
    a = np.argmax(Q[s,:])
    newS, r, done, _ = env.step(a)
    s = newS
    step_count += 1
    time.sleep(0.1)

print("Total steps: ",step_count)