In [1]:
%matplotlib inline

In [2]:
import gym
import numpy as np
import random as pr
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

def one_hot(x, depth=16):
    return np.identity(depth)[x:x+1]

Nrandom = 1000
rand_numbers = np.random.rand(Nrandom)
rand_numbers_idx = len(rand_numbers)
def get_rand():
    global rand_numbers, rand_numbers_idx
    if rand_numbers_idx == 0:
        rand_numbers = np.random.rand(Nrandom)
        rand_numbers_idx = len(rand_numbers)
    rand_numbers_idx -= 1
    return rand_numbers[rand_numbers_idx]

def train(env, model, 
          num_episodes=2000, 
          learning_rate=0.1,
          dis=.99):
    rList=[]
    for i in range(num_episodes):
        s = env.reset()
        e = 1.0 / ((i/50)+10)
        rAll = 0
        done = False
        local_loss = []

        Qs_all = []
        x_all = []
        while not done:
            Qs = model.predict(one_hot(s))
            if get_rand() < e:
                a = env.action_space.sample()
            else:
                a = np.argmax(Qs)

            s1, reward, done, _ = env.step(a)
            if done:
                Qs[0, a] = reward
            else:
                Qs1 = model.predict(one_hot(s1))
                Qs[0, a] = reward + dis*np.max(Qs1)

            Qs_all.append(Qs[0])
            x_all.append(one_hot(s)[0])

            rAll += reward
            s= s1

        print(f"Episode: {i}")
        model.fit(x=np.array(x_all), y=np.array(Qs_all)) 
        rList.append(rAll)
    return rList

def run(num_episodes=2000):
    env = gym.make("FrozenLake-v0")

    learning_rate = 0.1
    input_size = env.observation_space.n
    output_size = env.action_space.n
    dis = .99

    # input: 16, output: 4, bias: 4 ==> 68
    model = tf.keras.Sequential([
        Dense(output_size, input_shape=[input_size],
             kernel_initializer=tf.random_uniform_initializer(minval=0,maxval=0.01)) 
    ])
    #model.compile(optimizer="sgd", loss='mse')
    opt = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    model.compile(optimizer=opt, loss='mse')
    model.summary()
    
    rList = train(env, model, num_episodes=num_episodes,learning_rate=learning_rate,dis=dis)
    
    print("Percent of successful episode: "+str(sum(rList)/num_episodes)+"%")
    plt.bar(range(len(rList)), rList, color='blue')
    plt.show()

In [None]:
run()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 4)                 68        
Total params: 68
Trainable params: 68
Non-trainable params: 0
_________________________________________________________________
Episode: 0
Episode: 1
Episode: 2
Episode: 3
Episode: 4
Episode: 5
Episode: 6
Episode: 7
Episode: 8
Episode: 9
Episode: 10
Episode: 11
Episode: 12
Episode: 13
Episode: 14
Episode: 15
Episode: 16
Episode: 17
Episode: 18
Episode: 19
Episode: 20
Episode: 21
Episode: 22
Episode: 23
Episode: 24
Episode: 25
Episode: 26
Episode: 27
Episode: 28
Episode: 29
Episode: 30
Episode: 31
Episode: 32
Episode: 33
Episode: 34
Episode: 35
Episode: 36
Episode: 37
Episode: 38
Episode: 39
Episode: 40
Episode: 41
Episode: 42
Episode: 43
Episode: 44
Episode: 45
Episode: 46
Episode: 47
Episode: 48
Episode: 49
Episode: 50
Episode: 51
Episode: 52
Episode: 53
Episode: 54
Epis