In [1]:
import gym
import gym_pacman
import os 
import numpy as np

In [2]:
ghosts = [(0,3), (2,5), (3,0), (4,9), (8,4)]
walls = [(0,0), (0,9), (1,0), (1,1), (1,3), (1,4), (1,5), (1,6), (1,8), 
         (1,9),(3,1), (3,3), (3,4), (3,5), (3,6),(3,8), (4,1), (4,3),
         (4,6), (4,8),(5,3), (5,4), (5,5), (5,6), (6,0),(6,1), (6,8),
         (6,9), (7,0), (7,4),(7,5), (7,9), (8,0), (8,2), (8,7),(8,9),
         (9,0), (9,2), (9,3), (9,4),(9,5), (9,6), (9,7), (9,9)]

#the goal is set to 55 which is the number of cells with reward +1, which
#means that the agent reaches the goal when it eats everything in the world
env = gym.make('pacman-v0', rows=10, cols=10, step_cost=1, goal=55,
                ghosts=ghosts,
                walls=walls)

In [3]:
#perform 5 random actions (X=pacman, G=ghosts, <->=walls)
env.reset(rows=10, cols=10, step_cost=1, ghosts=ghosts, goal=55)
print('The initial state is:')
env.render()
print()

n_lives = 3

for _ in range(5):
    a = np.random.choice(env.actions)
    print('We take the action: '+a)
    
    transition = env.step(a)
    print('The transition we get is:')
    print(transition)
    print()
    
    env.render()
    print('\nThe total reward is: ' + str(env._total_reward))
    print()
    
    if transition.termination:
        if (env._total_reward == env._goal):
            print("Episode finished after {} timesteps".format(t+1))
            print('\nThe agent ate everything\n')
            break
        elif (n_lives == 1):
            print("Episode finished after {} timesteps".format(t+1))
            print('\nThe agent hit a ghost\n')
            break
        else :
            print('The agent hit a ghost\n')
            n_lives -= 1
            print('Going to the initial position...\n')
            print('Pacman still has ' + str(n_lives) + ' lives\n')
            env.cool_reset(ghosts=ghosts)

The initial state is:
[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0']
 [' G ' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '0.0']
 ['0.0' '<=>' '0.0' '<=>' '0.0' '0.0' '<=>' '0.0' '<=>' ' G ']
 ['0.0' '0.0' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '0.0' '0.0']
 ['<=>' '<=>' '0.0' '0.0' '0.0' ' X ' '0.0' '0.0' '<=>' '<=>']
 ['<=>' '0.0' '0.0' '0.0' '<=>' '<=>' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '0.0' ' G ' '0.0' '0.0' '<=>' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>']]

We take the action: left
The transition we get is:
Transition(state=(6, 5), action='left', next_state=(6, 4), reward=1.0, termination=False)

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0']
 [' G ' '<=>' '

In [4]:
#I'm doing 5 runs in which the agent performs at most 100 steps and it
#terminates doing a print if it hits a ghost or reaches the goal of
#eating everything in the world.
#The agent dies only after hitting a ghost three times: when it hits a
#ghost the negative reward of the ghost is summed, so that the agent
#can keep it into account while learning. 

for i_episode in range(5):
    n_lives = 3
    print('\nSTART: The initial state is:')
    observation = env.reset(rows=10, cols=10, step_cost=1, ghosts=ghosts, goal=55)
    for t in range(100):
        print('\nStep number ' + str(t) + '\n')
        env.render()
        print('\n')
        print('Total reward is: ' + str(env._total_reward))
        
        action = np.random.choice(env.actions)
        print('\n'+ 'We now take the action: '+action+'\n')
        
        transition = env.step(action)
        print('The transition we get is:')
        print(transition)
        
        if transition.termination:
            if (env._total_reward == env._goal):
                print("Episode finished after {} timesteps".format(t+1))
                print('\nThe agent ate everything\n')
                break
            elif (n_lives == 1):
                print("Episode finished after {} timesteps".format(t+1))
                print('\nThe agent hit a ghost\n')
                break
            else :
                print('The agent hit a ghost\n')
                n_lives -= 1
                print('Going to the initial position...\n')
                print('Pacman still has ' + str(n_lives) + ' lives\n')
                env.cool_reset(ghosts=ghosts)


START: The initial state is:

Step number 0

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0']
 [' G ' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '0.0']
 ['0.0' '<=>' '0.0' '<=>' '0.0' '0.0' '<=>' '0.0' '<=>' ' G ']
 ['0.0' '0.0' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '0.0' '0.0']
 ['<=>' '<=>' '0.0' '0.0' '0.0' ' X ' '0.0' '0.0' '<=>' '<=>']
 ['<=>' '0.0' '0.0' '0.0' '<=>' '<=>' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '0.0' ' G ' '0.0' '0.0' '<=>' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>']]


Total reward is: 0.0

We now take the action: left

The transition we get is:
Transition(state=(6, 5), action='left', next_state=(6, 4), reward=1.0, termination=False)

Step number 1

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0']
 [' G ' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '0.0']
 ['0.0' '<=>' '0.0' '<=>' '0.0' '0.0' '<=>' '0.0' '<=>' ' G ']
 ['0.0' '0.0' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '0.0' '0.0']
 ['<=>' '<=>' '0.0' ' X ' '0.0' '0.0' '0.0' '0.0' '<=>' '<=>']
 ['<=>' '0.0' '0.0' '0.0' '<=>' '<=>' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '0.0' ' G ' '0.0' '0.0' '<=>' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>']]


Total reward is: 3.0

We now take the action: left

The transition we get is:
Transition(state=(6, 3), action='left', next_state=(6, 2), reward=1.0, termination=False)

Step number 8

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '

The transition we get is:
Transition(state=(8, 3), action='right', next_state=(8, 4), reward=-100.0, termination=True)
The agent hit a ghost

Going to the initial position...

Pacman still has 2 lives


Step number 58

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0']
 [' G ' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '0.0']
 ['0.0' '<=>' '0.0' '<=>' '0.0' '0.0' '<=>' '0.0' '<=>' ' G ']
 ['0.0' '0.0' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '0.0' '0.0']
 ['<=>' '<=>' '0.0' '0.0' '0.0' ' X ' '0.0' '0.0' '<=>' '<=>']
 ['<=>' '0.0' '0.0' '0.0' '<=>' '<=>' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '0.0' ' G ' '0.0' '0.0' '<=>' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>']]


Total reward is: -89.0

We now take the action: left

The transition we get is:
Transition(state=(6, 5), action='left', next_state=(6, 4), reward=0.

Transition(state=(9, 1), action='left', next_state=(9, 1), reward=0.0, termination=False)

Step number 38

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0']
 [' G ' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '0.0']
 ['0.0' '<=>' '0.0' '<=>' '0.0' '0.0' '<=>' '0.0' '<=>' ' G ']
 ['0.0' '0.0' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '0.0' '0.0']
 ['<=>' '<=>' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>' '<=>']
 ['<=>' '0.0' '0.0' '0.0' '<=>' '<=>' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '0.0' ' G ' '0.0' '0.0' '<=>' '0.0' '<=>']
 ['<=>' ' X ' '<=>' '<=>' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>']]


Total reward is: -87.0

We now take the action: left

The transition we get is:
Transition(state=(9, 1), action='left', next_state=(9, 1), reward=0.0, termination=False)

Step number 39

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<

 ['<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>']]


Total reward is: -86.0

We now take the action: right

The transition we get is:
Transition(state=(7, 2), action='right', next_state=(7, 3), reward=0.0, termination=False)

Step number 84

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0']
 [' G ' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '0.0']
 ['0.0' '<=>' '0.0' '<=>' '0.0' '0.0' '<=>' '0.0' '<=>' ' G ']
 ['0.0' '0.0' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '0.0' '0.0']
 ['<=>' '<=>' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>' '<=>']
 ['<=>' '0.0' '0.0' ' X ' '<=>' '<=>' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '0.0' ' G ' '0.0' '0.0' '<=>' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>']]


Total reward is: -86.0

We now take the action: down

The transition we get is:
Transition(state=(7, 3), action=


Step number 90

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0']
 [' G ' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '0.0']
 ['0.0' '<=>' '0.0' '<=>' '0.0' '0.0' '<=>' '0.0' '<=>' ' G ']
 ['0.0' '0.0' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '0.0' '0.0']
 ['<=>' '<=>' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>' '<=>']
 ['<=>' '0.0' '0.0' '0.0' '<=>' '<=>' '0.0' '0.0' ' X ' '<=>']
 ['<=>' '0.0' '<=>' '0.0' ' G ' '0.0' '0.0' '<=>' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>']]


Total reward is: -87.0

We now take the action: up

The transition we get is:
Transition(state=(7, 8), action='up', next_state=(7, 8), reward=0.0, termination=False)

Step number 91

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.



Total reward is: 11.0

We now take the action: left

The transition we get is:
Transition(state=(4, 7), action='left', next_state=(4, 7), reward=0.0, termination=False)

Step number 43

[['<=>' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '<=>']
 ['0.0' '0.0' '0.0' '0.0' '0.0' ' G ' '0.0' '0.0' '0.0' '0.0']
 [' G ' '<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>' '0.0']
 ['0.0' '<=>' '0.0' '<=>' '0.0' '0.0' '<=>' ' X ' '<=>' ' G ']
 ['0.0' '0.0' '0.0' '<=>' '<=>' '<=>' '<=>' '0.0' '0.0' '0.0']
 ['<=>' '<=>' '0.0' '0.0' '0.0' '0.0' '0.0' '0.0' '<=>' '<=>']
 ['<=>' '0.0' '0.0' '0.0' '<=>' '<=>' '0.0' '0.0' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '0.0' ' G ' '0.0' '0.0' '<=>' '0.0' '<=>']
 ['<=>' '0.0' '<=>' '<=>' '<=>' '<=>' '<=>' '<=>' '0.0' '<=>']]


Total reward is: 11.0

We now take the action: down

The transition we get is:
Transition(state=(4, 7), action='down', next_state=(5, 7), reward=0.0, termination=False)

Step numb