In [1]:
import gym
import numpy as np
from collections import defaultdict
import random




In [2]:
taxi_env = gym.make('Taxi-v3')

In [3]:
print('Action Space {}'.format(taxi_env.action_space))
print('State Space {}'.format(taxi_env.observation_space))

Action Space Discrete(6)
State Space Discrete(500)


In [4]:
initial_state = taxi_env.reset()
print('Initial State {}'.format(initial_state))

Initial State 393


In [5]:
#(taxi row, taxi column, passenger location, destination location)
print('Decoded state ', list(taxi_env.env.decode(initial_state)))
taxi_env.render()

Decoded state  [3, 4, 3, 1]
+---------+
|R: | : :[35mG[0m|
| : | : : |
| : : : : |
| | : | :[43m [0m|
|Y| : |[34;1mB[0m: |
+---------+



In [6]:
# the reward table
taxi_env.env.P[initial_state]

{0: [(1.0, 493, -1, False)],
 1: [(1.0, 293, -1, False)],
 2: [(1.0, 393, -1, False)],
 3: [(1.0, 373, -1, False)],
 4: [(1.0, 393, -10, False)],
 5: [(1.0, 393, -10, False)]}

In [7]:
taxi_env.step(0)

(493, -1, False, {'prob': 1.0})

In [8]:
taxi_env.render()

+---------+
|R: | : :[35mG[0m|
| : | : : |
| : : : : |
| | : | : |
|Y| : |[34;1mB[0m:[43m [0m|
+---------+
  (South)


In [9]:
# Q learning hyperparameters
learning_rate = 0.1
gamma = 0.6
epsilon = 0.8

In [10]:
# initializing the Q learning
qtable = np.zeros([taxi_env.observation_space.n, taxi_env.action_space.n])

In [11]:
def q_learning_update(qtable, env, state, epsilon):
    
    trade_off = random.uniform(0, 1)
    
    if trade_off > epsilon:
        action = env.action_space.sample()
    else:
        action = select_optimal_action(qtable, state)
    
    next_state, reward, done, _ = env.step(action)
    old_q_value = qtable[state][action]
    next_max = np.max(qtable[next_state])
    new_q_value = (1 - learning_rate) * old_q_value + learning_rate * (reward + gamma * next_max) 
    
    # update q table
    qtable[state][action] = new_q_value
    
    return next_state, reward, done

In [12]:
def select_optimal_action(qtable, state):
    
    if np.sum(qtable[state]) == 0:
        return random.randint(0, qtable.shape[1]-1)
    
    return np.argmax(qtable[state])

In [13]:
def train_agent(qtable, env, episodes, epsilon):
    for i in range(episodes):
        state = env.reset()
        epochs = 0
        penalties, total_reward = 0, 0
        done =False
        
        while not done:
            state, reward, done = q_learning_update(qtable, env, state, epsilon)
            total_reward += 1
            
            if reward == -10:
                penalties +=1
            
            epochs += 1
        
        print('\nTraining episode:{}'.format(i+1))
        print('Time steps:{}, Penalties:{}, Reward:{}'.format(epochs, penalties, total_reward))
        
    print('Training finished.\n')
    
    return qtable      

In [14]:
qtable = train_agent(qtable, taxi_env, 5000, epsilon)


Training episode:1
Time steps:200, Penalties:38, Reward:200

Training episode:2
Time steps:200, Penalties:44, Reward:200

Training episode:3
Time steps:200, Penalties:47, Reward:200

Training episode:4
Time steps:200, Penalties:44, Reward:200

Training episode:5
Time steps:200, Penalties:39, Reward:200

Training episode:6
Time steps:200, Penalties:43, Reward:200

Training episode:7
Time steps:200, Penalties:36, Reward:200

Training episode:8
Time steps:200, Penalties:40, Reward:200

Training episode:9
Time steps:200, Penalties:40, Reward:200

Training episode:10
Time steps:22, Penalties:3, Reward:22

Training episode:11
Time steps:200, Penalties:30, Reward:200

Training episode:12
Time steps:200, Penalties:18, Reward:200

Training episode:13
Time steps:200, Penalties:24, Reward:200

Training episode:14
Time steps:200, Penalties:15, Reward:200

Training episode:15
Time steps:200, Penalties:10, Reward:200

Training episode:16
Time steps:200, Penalties:35, Reward:200

Training episode:17


Training episode:155
Time steps:200, Penalties:13, Reward:200

Training episode:156
Time steps:200, Penalties:13, Reward:200

Training episode:157
Time steps:189, Penalties:20, Reward:189

Training episode:158
Time steps:200, Penalties:14, Reward:200

Training episode:159
Time steps:200, Penalties:5, Reward:200

Training episode:160
Time steps:108, Penalties:4, Reward:108

Training episode:161
Time steps:200, Penalties:10, Reward:200

Training episode:162
Time steps:96, Penalties:5, Reward:96

Training episode:163
Time steps:200, Penalties:12, Reward:200

Training episode:164
Time steps:144, Penalties:6, Reward:144

Training episode:165
Time steps:200, Penalties:20, Reward:200

Training episode:166
Time steps:164, Penalties:6, Reward:164

Training episode:167
Time steps:200, Penalties:14, Reward:200

Training episode:168
Time steps:200, Penalties:18, Reward:200

Training episode:169
Time steps:84, Penalties:7, Reward:84

Training episode:170
Time steps:200, Penalties:13, Reward:200

T


Training episode:296
Time steps:188, Penalties:18, Reward:188

Training episode:297
Time steps:200, Penalties:9, Reward:200

Training episode:298
Time steps:53, Penalties:5, Reward:53

Training episode:299
Time steps:200, Penalties:11, Reward:200

Training episode:300
Time steps:64, Penalties:5, Reward:64

Training episode:301
Time steps:100, Penalties:9, Reward:100

Training episode:302
Time steps:200, Penalties:15, Reward:200

Training episode:303
Time steps:83, Penalties:5, Reward:83

Training episode:304
Time steps:195, Penalties:10, Reward:195

Training episode:305
Time steps:191, Penalties:15, Reward:191

Training episode:306
Time steps:75, Penalties:4, Reward:75

Training episode:307
Time steps:200, Penalties:8, Reward:200

Training episode:308
Time steps:114, Penalties:5, Reward:114

Training episode:309
Time steps:200, Penalties:12, Reward:200

Training episode:310
Time steps:200, Penalties:12, Reward:200

Training episode:311
Time steps:140, Penalties:4, Reward:140

Training


Training episode:456
Time steps:121, Penalties:10, Reward:121

Training episode:457
Time steps:114, Penalties:4, Reward:114

Training episode:458
Time steps:200, Penalties:17, Reward:200

Training episode:459
Time steps:7, Penalties:0, Reward:7

Training episode:460
Time steps:82, Penalties:7, Reward:82

Training episode:461
Time steps:46, Penalties:1, Reward:46

Training episode:462
Time steps:200, Penalties:16, Reward:200

Training episode:463
Time steps:81, Penalties:5, Reward:81

Training episode:464
Time steps:200, Penalties:8, Reward:200

Training episode:465
Time steps:137, Penalties:8, Reward:137

Training episode:466
Time steps:200, Penalties:15, Reward:200

Training episode:467
Time steps:200, Penalties:17, Reward:200

Training episode:468
Time steps:64, Penalties:8, Reward:64

Training episode:469
Time steps:47, Penalties:5, Reward:47

Training episode:470
Time steps:188, Penalties:16, Reward:188

Training episode:471
Time steps:64, Penalties:5, Reward:64

Training episode:


Training episode:615
Time steps:164, Penalties:14, Reward:164

Training episode:616
Time steps:200, Penalties:15, Reward:200

Training episode:617
Time steps:75, Penalties:4, Reward:75

Training episode:618
Time steps:26, Penalties:2, Reward:26

Training episode:619
Time steps:58, Penalties:3, Reward:58

Training episode:620
Time steps:97, Penalties:7, Reward:97

Training episode:621
Time steps:200, Penalties:12, Reward:200

Training episode:622
Time steps:23, Penalties:1, Reward:23

Training episode:623
Time steps:8, Penalties:0, Reward:8

Training episode:624
Time steps:200, Penalties:11, Reward:200

Training episode:625
Time steps:88, Penalties:5, Reward:88

Training episode:626
Time steps:42, Penalties:5, Reward:42

Training episode:627
Time steps:74, Penalties:3, Reward:74

Training episode:628
Time steps:69, Penalties:5, Reward:69

Training episode:629
Time steps:30, Penalties:3, Reward:30

Training episode:630
Time steps:8, Penalties:0, Reward:8

Training episode:631
Time steps


Training episode:779
Time steps:116, Penalties:12, Reward:116

Training episode:780
Time steps:17, Penalties:1, Reward:17

Training episode:781
Time steps:199, Penalties:14, Reward:199

Training episode:782
Time steps:108, Penalties:8, Reward:108

Training episode:783
Time steps:102, Penalties:7, Reward:102

Training episode:784
Time steps:92, Penalties:9, Reward:92

Training episode:785
Time steps:107, Penalties:6, Reward:107

Training episode:786
Time steps:72, Penalties:5, Reward:72

Training episode:787
Time steps:35, Penalties:1, Reward:35

Training episode:788
Time steps:28, Penalties:2, Reward:28

Training episode:789
Time steps:97, Penalties:9, Reward:97

Training episode:790
Time steps:70, Penalties:1, Reward:70

Training episode:791
Time steps:79, Penalties:8, Reward:79

Training episode:792
Time steps:43, Penalties:4, Reward:43

Training episode:793
Time steps:15, Penalties:1, Reward:15

Training episode:794
Time steps:182, Penalties:11, Reward:182

Training episode:795
Tim


Training episode:980
Time steps:65, Penalties:1, Reward:65

Training episode:981
Time steps:34, Penalties:0, Reward:34

Training episode:982
Time steps:42, Penalties:3, Reward:42

Training episode:983
Time steps:106, Penalties:7, Reward:106

Training episode:984
Time steps:103, Penalties:9, Reward:103

Training episode:985
Time steps:25, Penalties:0, Reward:25

Training episode:986
Time steps:38, Penalties:1, Reward:38

Training episode:987
Time steps:101, Penalties:4, Reward:101

Training episode:988
Time steps:195, Penalties:18, Reward:195

Training episode:989
Time steps:34, Penalties:1, Reward:34

Training episode:990
Time steps:48, Penalties:4, Reward:48

Training episode:991
Time steps:50, Penalties:4, Reward:50

Training episode:992
Time steps:28, Penalties:0, Reward:28

Training episode:993
Time steps:84, Penalties:7, Reward:84

Training episode:994
Time steps:75, Penalties:5, Reward:75

Training episode:995
Time steps:33, Penalties:0, Reward:33

Training episode:996
Time step

Training episode:1237
Time steps:33, Penalties:2, Reward:33

Training episode:1238
Time steps:25, Penalties:1, Reward:25

Training episode:1239
Time steps:21, Penalties:0, Reward:21

Training episode:1240
Time steps:14, Penalties:3, Reward:14

Training episode:1241
Time steps:22, Penalties:0, Reward:22

Training episode:1242
Time steps:11, Penalties:1, Reward:11

Training episode:1243
Time steps:37, Penalties:2, Reward:37

Training episode:1244
Time steps:30, Penalties:2, Reward:30

Training episode:1245
Time steps:83, Penalties:5, Reward:83

Training episode:1246
Time steps:37, Penalties:0, Reward:37

Training episode:1247
Time steps:92, Penalties:8, Reward:92

Training episode:1248
Time steps:15, Penalties:0, Reward:15

Training episode:1249
Time steps:19, Penalties:0, Reward:19

Training episode:1250
Time steps:66, Penalties:4, Reward:66

Training episode:1251
Time steps:16, Penalties:1, Reward:16

Training episode:1252
Time steps:15, Penalties:1, Reward:15

Training episode:1253
Ti


Training episode:1399
Time steps:108, Penalties:4, Reward:108

Training episode:1400
Time steps:80, Penalties:9, Reward:80

Training episode:1401
Time steps:15, Penalties:0, Reward:15

Training episode:1402
Time steps:38, Penalties:4, Reward:38

Training episode:1403
Time steps:14, Penalties:1, Reward:14

Training episode:1404
Time steps:96, Penalties:4, Reward:96

Training episode:1405
Time steps:88, Penalties:5, Reward:88

Training episode:1406
Time steps:19, Penalties:0, Reward:19

Training episode:1407
Time steps:35, Penalties:4, Reward:35

Training episode:1408
Time steps:15, Penalties:1, Reward:15

Training episode:1409
Time steps:69, Penalties:4, Reward:69

Training episode:1410
Time steps:15, Penalties:1, Reward:15

Training episode:1411
Time steps:26, Penalties:1, Reward:26

Training episode:1412
Time steps:45, Penalties:1, Reward:45

Training episode:1413
Time steps:14, Penalties:1, Reward:14

Training episode:1414
Time steps:20, Penalties:1, Reward:20

Training episode:1415


Training episode:1561
Time steps:68, Penalties:6, Reward:68

Training episode:1562
Time steps:14, Penalties:1, Reward:14

Training episode:1563
Time steps:22, Penalties:0, Reward:22

Training episode:1564
Time steps:22, Penalties:2, Reward:22

Training episode:1565
Time steps:19, Penalties:0, Reward:19

Training episode:1566
Time steps:34, Penalties:4, Reward:34

Training episode:1567
Time steps:55, Penalties:8, Reward:55

Training episode:1568
Time steps:30, Penalties:2, Reward:30

Training episode:1569
Time steps:30, Penalties:1, Reward:30

Training episode:1570
Time steps:23, Penalties:1, Reward:23

Training episode:1571
Time steps:45, Penalties:3, Reward:45

Training episode:1572
Time steps:17, Penalties:0, Reward:17

Training episode:1573
Time steps:16, Penalties:1, Reward:16

Training episode:1574
Time steps:17, Penalties:1, Reward:17

Training episode:1575
Time steps:18, Penalties:1, Reward:18

Training episode:1576
Time steps:27, Penalties:0, Reward:27

Training episode:1577
T


Training episode:1754
Time steps:45, Penalties:3, Reward:45

Training episode:1755
Time steps:65, Penalties:4, Reward:65

Training episode:1756
Time steps:30, Penalties:2, Reward:30

Training episode:1757
Time steps:20, Penalties:2, Reward:20

Training episode:1758
Time steps:18, Penalties:0, Reward:18

Training episode:1759
Time steps:23, Penalties:1, Reward:23

Training episode:1760
Time steps:17, Penalties:0, Reward:17

Training episode:1761
Time steps:16, Penalties:1, Reward:16

Training episode:1762
Time steps:31, Penalties:1, Reward:31

Training episode:1763
Time steps:7, Penalties:0, Reward:7

Training episode:1764
Time steps:16, Penalties:0, Reward:16

Training episode:1765
Time steps:17, Penalties:0, Reward:17

Training episode:1766
Time steps:32, Penalties:1, Reward:32

Training episode:1767
Time steps:32, Penalties:1, Reward:32

Training episode:1768
Time steps:14, Penalties:2, Reward:14

Training episode:1769
Time steps:56, Penalties:3, Reward:56

Training episode:1770
Tim

Training episode:1948
Time steps:27, Penalties:0, Reward:27

Training episode:1949
Time steps:16, Penalties:1, Reward:16

Training episode:1950
Time steps:33, Penalties:2, Reward:33

Training episode:1951
Time steps:24, Penalties:1, Reward:24

Training episode:1952
Time steps:10, Penalties:0, Reward:10

Training episode:1953
Time steps:18, Penalties:2, Reward:18

Training episode:1954
Time steps:8, Penalties:1, Reward:8

Training episode:1955
Time steps:38, Penalties:2, Reward:38

Training episode:1956
Time steps:29, Penalties:1, Reward:29

Training episode:1957
Time steps:22, Penalties:2, Reward:22

Training episode:1958
Time steps:17, Penalties:3, Reward:17

Training episode:1959
Time steps:106, Penalties:14, Reward:106

Training episode:1960
Time steps:34, Penalties:1, Reward:34

Training episode:1961
Time steps:16, Penalties:1, Reward:16

Training episode:1962
Time steps:11, Penalties:0, Reward:11

Training episode:1963
Time steps:7, Penalties:0, Reward:7

Training episode:1964
Tim

Training episode:2153
Time steps:18, Penalties:0, Reward:18

Training episode:2154
Time steps:25, Penalties:2, Reward:25

Training episode:2155
Time steps:21, Penalties:1, Reward:21

Training episode:2156
Time steps:37, Penalties:3, Reward:37

Training episode:2157
Time steps:10, Penalties:0, Reward:10

Training episode:2158
Time steps:66, Penalties:3, Reward:66

Training episode:2159
Time steps:13, Penalties:0, Reward:13

Training episode:2160
Time steps:13, Penalties:0, Reward:13

Training episode:2161
Time steps:10, Penalties:0, Reward:10

Training episode:2162
Time steps:20, Penalties:0, Reward:20

Training episode:2163
Time steps:34, Penalties:3, Reward:34

Training episode:2164
Time steps:40, Penalties:2, Reward:40

Training episode:2165
Time steps:42, Penalties:3, Reward:42

Training episode:2166
Time steps:8, Penalties:0, Reward:8

Training episode:2167
Time steps:21, Penalties:0, Reward:21

Training episode:2168
Time steps:17, Penalties:1, Reward:17

Training episode:2169
Time


Training episode:2371
Time steps:52, Penalties:1, Reward:52

Training episode:2372
Time steps:34, Penalties:3, Reward:34

Training episode:2373
Time steps:48, Penalties:1, Reward:48

Training episode:2374
Time steps:13, Penalties:2, Reward:13

Training episode:2375
Time steps:49, Penalties:5, Reward:49

Training episode:2376
Time steps:18, Penalties:1, Reward:18

Training episode:2377
Time steps:22, Penalties:0, Reward:22

Training episode:2378
Time steps:13, Penalties:1, Reward:13

Training episode:2379
Time steps:21, Penalties:2, Reward:21

Training episode:2380
Time steps:9, Penalties:1, Reward:9

Training episode:2381
Time steps:8, Penalties:0, Reward:8

Training episode:2382
Time steps:11, Penalties:0, Reward:11

Training episode:2383
Time steps:34, Penalties:2, Reward:34

Training episode:2384
Time steps:41, Penalties:1, Reward:41

Training episode:2385
Time steps:12, Penalties:0, Reward:12

Training episode:2386
Time steps:15, Penalties:1, Reward:15

Training episode:2387
Time 

Training episode:2597
Time steps:18, Penalties:0, Reward:18

Training episode:2598
Time steps:28, Penalties:2, Reward:28

Training episode:2599
Time steps:13, Penalties:1, Reward:13

Training episode:2600
Time steps:17, Penalties:0, Reward:17

Training episode:2601
Time steps:14, Penalties:2, Reward:14

Training episode:2602
Time steps:32, Penalties:3, Reward:32

Training episode:2603
Time steps:16, Penalties:0, Reward:16

Training episode:2604
Time steps:26, Penalties:3, Reward:26

Training episode:2605
Time steps:13, Penalties:1, Reward:13

Training episode:2606
Time steps:25, Penalties:3, Reward:25

Training episode:2607
Time steps:26, Penalties:0, Reward:26

Training episode:2608
Time steps:17, Penalties:0, Reward:17

Training episode:2609
Time steps:34, Penalties:4, Reward:34

Training episode:2610
Time steps:17, Penalties:0, Reward:17

Training episode:2611
Time steps:11, Penalties:0, Reward:11

Training episode:2612
Time steps:13, Penalties:0, Reward:13

Training episode:2613
Ti

Time steps:17, Penalties:0, Reward:17

Training episode:2741
Time steps:10, Penalties:0, Reward:10

Training episode:2742
Time steps:41, Penalties:2, Reward:41

Training episode:2743
Time steps:22, Penalties:2, Reward:22

Training episode:2744
Time steps:20, Penalties:0, Reward:20

Training episode:2745
Time steps:17, Penalties:0, Reward:17

Training episode:2746
Time steps:18, Penalties:1, Reward:18

Training episode:2747
Time steps:15, Penalties:1, Reward:15

Training episode:2748
Time steps:34, Penalties:5, Reward:34

Training episode:2749
Time steps:12, Penalties:0, Reward:12

Training episode:2750
Time steps:28, Penalties:3, Reward:28

Training episode:2751
Time steps:19, Penalties:2, Reward:19

Training episode:2752
Time steps:35, Penalties:4, Reward:35

Training episode:2753
Time steps:24, Penalties:3, Reward:24

Training episode:2754
Time steps:15, Penalties:1, Reward:15

Training episode:2755
Time steps:21, Penalties:2, Reward:21

Training episode:2756
Time steps:20, Penalties


Training episode:2883
Time steps:15, Penalties:2, Reward:15

Training episode:2884
Time steps:15, Penalties:2, Reward:15

Training episode:2885
Time steps:32, Penalties:3, Reward:32

Training episode:2886
Time steps:10, Penalties:0, Reward:10

Training episode:2887
Time steps:14, Penalties:1, Reward:14

Training episode:2888
Time steps:49, Penalties:2, Reward:49

Training episode:2889
Time steps:20, Penalties:2, Reward:20

Training episode:2890
Time steps:18, Penalties:0, Reward:18

Training episode:2891
Time steps:24, Penalties:2, Reward:24

Training episode:2892
Time steps:15, Penalties:1, Reward:15

Training episode:2893
Time steps:30, Penalties:3, Reward:30

Training episode:2894
Time steps:26, Penalties:1, Reward:26

Training episode:2895
Time steps:22, Penalties:3, Reward:22

Training episode:2896
Time steps:28, Penalties:1, Reward:28

Training episode:2897
Time steps:24, Penalties:3, Reward:24

Training episode:2898
Time steps:31, Penalties:1, Reward:31

Training episode:2899
T


Training episode:3018
Time steps:19, Penalties:1, Reward:19

Training episode:3019
Time steps:16, Penalties:1, Reward:16

Training episode:3020
Time steps:11, Penalties:0, Reward:11

Training episode:3021
Time steps:26, Penalties:1, Reward:26

Training episode:3022
Time steps:31, Penalties:3, Reward:31

Training episode:3023
Time steps:22, Penalties:1, Reward:22

Training episode:3024
Time steps:11, Penalties:0, Reward:11

Training episode:3025
Time steps:14, Penalties:1, Reward:14

Training episode:3026
Time steps:26, Penalties:3, Reward:26

Training episode:3027
Time steps:16, Penalties:0, Reward:16

Training episode:3028
Time steps:21, Penalties:0, Reward:21

Training episode:3029
Time steps:26, Penalties:4, Reward:26

Training episode:3030
Time steps:36, Penalties:5, Reward:36

Training episode:3031
Time steps:14, Penalties:1, Reward:14

Training episode:3032
Time steps:16, Penalties:0, Reward:16

Training episode:3033
Time steps:19, Penalties:1, Reward:19

Training episode:3034
T


Training episode:3284
Time steps:21, Penalties:1, Reward:21

Training episode:3285
Time steps:9, Penalties:0, Reward:9

Training episode:3286
Time steps:16, Penalties:1, Reward:16

Training episode:3287
Time steps:12, Penalties:0, Reward:12

Training episode:3288
Time steps:33, Penalties:3, Reward:33

Training episode:3289
Time steps:11, Penalties:0, Reward:11

Training episode:3290
Time steps:16, Penalties:0, Reward:16

Training episode:3291
Time steps:16, Penalties:1, Reward:16

Training episode:3292
Time steps:19, Penalties:1, Reward:19

Training episode:3293
Time steps:6, Penalties:0, Reward:6

Training episode:3294
Time steps:20, Penalties:2, Reward:20

Training episode:3295
Time steps:16, Penalties:0, Reward:16

Training episode:3296
Time steps:10, Penalties:1, Reward:10

Training episode:3297
Time steps:15, Penalties:1, Reward:15

Training episode:3298
Time steps:60, Penalties:7, Reward:60

Training episode:3299
Time steps:18, Penalties:1, Reward:18

Training episode:3300
Time 


Training episode:3552
Time steps:17, Penalties:2, Reward:17

Training episode:3553
Time steps:23, Penalties:1, Reward:23

Training episode:3554
Time steps:21, Penalties:2, Reward:21

Training episode:3555
Time steps:10, Penalties:0, Reward:10

Training episode:3556
Time steps:21, Penalties:2, Reward:21

Training episode:3557
Time steps:14, Penalties:1, Reward:14

Training episode:3558
Time steps:24, Penalties:0, Reward:24

Training episode:3559
Time steps:17, Penalties:1, Reward:17

Training episode:3560
Time steps:16, Penalties:2, Reward:16

Training episode:3561
Time steps:12, Penalties:0, Reward:12

Training episode:3562
Time steps:12, Penalties:0, Reward:12

Training episode:3563
Time steps:13, Penalties:0, Reward:13

Training episode:3564
Time steps:15, Penalties:1, Reward:15

Training episode:3565
Time steps:8, Penalties:0, Reward:8

Training episode:3566
Time steps:18, Penalties:2, Reward:18

Training episode:3567
Time steps:22, Penalties:1, Reward:22

Training episode:3568
Tim

Time steps:25, Penalties:1, Reward:25

Training episode:3714
Time steps:11, Penalties:0, Reward:11

Training episode:3715
Time steps:21, Penalties:2, Reward:21

Training episode:3716
Time steps:13, Penalties:0, Reward:13

Training episode:3717
Time steps:15, Penalties:1, Reward:15

Training episode:3718
Time steps:17, Penalties:3, Reward:17

Training episode:3719
Time steps:10, Penalties:0, Reward:10

Training episode:3720
Time steps:13, Penalties:0, Reward:13

Training episode:3721
Time steps:16, Penalties:0, Reward:16

Training episode:3722
Time steps:15, Penalties:1, Reward:15

Training episode:3723
Time steps:17, Penalties:1, Reward:17

Training episode:3724
Time steps:14, Penalties:0, Reward:14

Training episode:3725
Time steps:37, Penalties:1, Reward:37

Training episode:3726
Time steps:16, Penalties:3, Reward:16

Training episode:3727
Time steps:20, Penalties:1, Reward:20

Training episode:3728
Time steps:14, Penalties:0, Reward:14

Training episode:3729
Time steps:11, Penalties

Time steps:16, Penalties:1, Reward:16

Training episode:3854
Time steps:13, Penalties:0, Reward:13

Training episode:3855
Time steps:11, Penalties:2, Reward:11

Training episode:3856
Time steps:12, Penalties:1, Reward:12

Training episode:3857
Time steps:29, Penalties:2, Reward:29

Training episode:3858
Time steps:14, Penalties:1, Reward:14

Training episode:3859
Time steps:23, Penalties:4, Reward:23

Training episode:3860
Time steps:27, Penalties:1, Reward:27

Training episode:3861
Time steps:35, Penalties:2, Reward:35

Training episode:3862
Time steps:13, Penalties:0, Reward:13

Training episode:3863
Time steps:8, Penalties:0, Reward:8

Training episode:3864
Time steps:11, Penalties:0, Reward:11

Training episode:3865
Time steps:10, Penalties:0, Reward:10

Training episode:3866
Time steps:25, Penalties:4, Reward:25

Training episode:3867
Time steps:19, Penalties:0, Reward:19

Training episode:3868
Time steps:35, Penalties:3, Reward:35

Training episode:3869
Time steps:13, Penalties:0


Training episode:3999
Time steps:25, Penalties:1, Reward:25

Training episode:4000
Time steps:26, Penalties:3, Reward:26

Training episode:4001
Time steps:17, Penalties:0, Reward:17

Training episode:4002
Time steps:11, Penalties:1, Reward:11

Training episode:4003
Time steps:19, Penalties:2, Reward:19

Training episode:4004
Time steps:16, Penalties:0, Reward:16

Training episode:4005
Time steps:20, Penalties:1, Reward:20

Training episode:4006
Time steps:19, Penalties:1, Reward:19

Training episode:4007
Time steps:17, Penalties:0, Reward:17

Training episode:4008
Time steps:20, Penalties:1, Reward:20

Training episode:4009
Time steps:15, Penalties:3, Reward:15

Training episode:4010
Time steps:22, Penalties:3, Reward:22

Training episode:4011
Time steps:18, Penalties:0, Reward:18

Training episode:4012
Time steps:28, Penalties:4, Reward:28

Training episode:4013
Time steps:10, Penalties:0, Reward:10

Training episode:4014
Time steps:17, Penalties:0, Reward:17

Training episode:4015
T


Training episode:4136
Time steps:10, Penalties:0, Reward:10

Training episode:4137
Time steps:18, Penalties:3, Reward:18

Training episode:4138
Time steps:17, Penalties:2, Reward:17

Training episode:4139
Time steps:19, Penalties:1, Reward:19

Training episode:4140
Time steps:14, Penalties:0, Reward:14

Training episode:4141
Time steps:13, Penalties:1, Reward:13

Training episode:4142
Time steps:22, Penalties:0, Reward:22

Training episode:4143
Time steps:24, Penalties:2, Reward:24

Training episode:4144
Time steps:23, Penalties:4, Reward:23

Training episode:4145
Time steps:29, Penalties:2, Reward:29

Training episode:4146
Time steps:13, Penalties:0, Reward:13

Training episode:4147
Time steps:19, Penalties:1, Reward:19

Training episode:4148
Time steps:28, Penalties:2, Reward:28

Training episode:4149
Time steps:15, Penalties:2, Reward:15

Training episode:4150
Time steps:13, Penalties:0, Reward:13

Training episode:4151
Time steps:10, Penalties:0, Reward:10

Training episode:4152
T


Training episode:4275
Time steps:18, Penalties:0, Reward:18

Training episode:4276
Time steps:33, Penalties:4, Reward:33

Training episode:4277
Time steps:18, Penalties:1, Reward:18

Training episode:4278
Time steps:16, Penalties:2, Reward:16

Training episode:4279
Time steps:10, Penalties:1, Reward:10

Training episode:4280
Time steps:14, Penalties:0, Reward:14

Training episode:4281
Time steps:11, Penalties:0, Reward:11

Training episode:4282
Time steps:15, Penalties:1, Reward:15

Training episode:4283
Time steps:15, Penalties:1, Reward:15

Training episode:4284
Time steps:21, Penalties:3, Reward:21

Training episode:4285
Time steps:20, Penalties:2, Reward:20

Training episode:4286
Time steps:24, Penalties:4, Reward:24

Training episode:4287
Time steps:15, Penalties:2, Reward:15

Training episode:4288
Time steps:17, Penalties:2, Reward:17

Training episode:4289
Time steps:19, Penalties:0, Reward:19

Training episode:4290
Time steps:17, Penalties:2, Reward:17

Training episode:4291
T

Time steps:13, Penalties:0, Reward:13

Training episode:4553
Time steps:19, Penalties:4, Reward:19

Training episode:4554
Time steps:21, Penalties:0, Reward:21

Training episode:4555
Time steps:19, Penalties:2, Reward:19

Training episode:4556
Time steps:15, Penalties:1, Reward:15

Training episode:4557
Time steps:23, Penalties:2, Reward:23

Training episode:4558
Time steps:30, Penalties:1, Reward:30

Training episode:4559
Time steps:11, Penalties:0, Reward:11

Training episode:4560
Time steps:27, Penalties:4, Reward:27

Training episode:4561
Time steps:17, Penalties:1, Reward:17

Training episode:4562
Time steps:14, Penalties:1, Reward:14

Training episode:4563
Time steps:15, Penalties:1, Reward:15

Training episode:4564
Time steps:13, Penalties:1, Reward:13

Training episode:4565
Time steps:20, Penalties:0, Reward:20

Training episode:4566
Time steps:15, Penalties:2, Reward:15

Training episode:4567
Time steps:14, Penalties:0, Reward:14

Training episode:4568
Time steps:15, Penalties

Time steps:17, Penalties:2, Reward:17

Training episode:4702
Time steps:23, Penalties:2, Reward:23

Training episode:4703
Time steps:20, Penalties:1, Reward:20

Training episode:4704
Time steps:11, Penalties:0, Reward:11

Training episode:4705
Time steps:14, Penalties:1, Reward:14

Training episode:4706
Time steps:17, Penalties:1, Reward:17

Training episode:4707
Time steps:15, Penalties:0, Reward:15

Training episode:4708
Time steps:19, Penalties:0, Reward:19

Training episode:4709
Time steps:11, Penalties:0, Reward:11

Training episode:4710
Time steps:24, Penalties:0, Reward:24

Training episode:4711
Time steps:22, Penalties:1, Reward:22

Training episode:4712
Time steps:18, Penalties:3, Reward:18

Training episode:4713
Time steps:16, Penalties:2, Reward:16

Training episode:4714
Time steps:25, Penalties:3, Reward:25

Training episode:4715
Time steps:19, Penalties:0, Reward:19

Training episode:4716
Time steps:23, Penalties:2, Reward:23

Training episode:4717
Time steps:25, Penalties


Training episode:4849
Time steps:21, Penalties:1, Reward:21

Training episode:4850
Time steps:20, Penalties:0, Reward:20

Training episode:4851
Time steps:20, Penalties:0, Reward:20

Training episode:4852
Time steps:17, Penalties:3, Reward:17

Training episode:4853
Time steps:12, Penalties:0, Reward:12

Training episode:4854
Time steps:16, Penalties:0, Reward:16

Training episode:4855
Time steps:26, Penalties:3, Reward:26

Training episode:4856
Time steps:17, Penalties:3, Reward:17

Training episode:4857
Time steps:13, Penalties:0, Reward:13

Training episode:4858
Time steps:23, Penalties:1, Reward:23

Training episode:4859
Time steps:20, Penalties:0, Reward:20

Training episode:4860
Time steps:19, Penalties:2, Reward:19

Training episode:4861
Time steps:19, Penalties:2, Reward:19

Training episode:4862
Time steps:22, Penalties:1, Reward:22

Training episode:4863
Time steps:16, Penalties:0, Reward:16

Training episode:4864
Time steps:19, Penalties:1, Reward:19

Training episode:4865
T

In [15]:
initial_state = taxi_env.reset()
print(initial_state)
taxi_env.render()

348
+---------+
|[35mR[0m: | : :G|
| : | : : |
| : : : : |
| | :[43m [0m| : |
|[34;1mY[0m| : |B: |
+---------+



In [16]:
qtable[188]

array([-2.44411698, -2.44594249, -2.44495452, -2.44397586, -9.35370518,
       -9.34124151])