Import mario modules

In [1]:
#! /usr/bin/python3
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import *

Import plotting modules

In [2]:
import matplotlib.pyplot as plt
from matplotlib import animation, rc
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
import random

Setup plotting

In [4]:
%matplotlib inline
%matplotlib notebook
plt.ioff()

Setup enviroment

In [5]:
env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
env = JoypadSpace(env, RIGHT_ONLY)

Setup parameters

In [6]:
#Animation
save_animation = 0 # Saves animation to plot
frames_skipped = 1 # How many frames to skip when creating animation

#Generation
generation_amount = 10

#Population
population_amount = 30 # Amount of agents per generation

#Agent
moves_amount = 5000 # Max amount of moves agent can perform
moves_to_check = 100 # How many moves without increase in fitness until termination
mutation_rate = 0.001 # Chance of any move changing to a random move


In [7]:
t = 0
def print_info(info, reward):
    global t 
    t += 1
    if not t % 100:
        print(info, reward)

In [8]:
def handle_frame(step, player):
    if save_animation:
        if not step % frames_skipped:
            image = plt.imshow(env.render(mode='rgb_array'))
            player.images.append([image])
    env.render()

In [9]:
def display_animation(player):
    anim = animation.ArtistAnimation(player.fig, player.images,
                                     interval=15*frames_skipped, blit=True)
    rc('animation', html='jshtml')
    return(anim)

In [10]:
class Agent:
    def __init__(self, fittest):
        self.fitness = 0
        self.images = []
        self.fig = plt.figure()
        if fittest:
            self.moves = fittest.moves.copy()
        else:
            self.moves = []

In [11]:
def check_fitness(player, fitness, past_fitness):
    if player.fitness < fitness:
        player.fitness = fitness
    if len(past_fitness) < moves_to_check:
        past_fitness.append(fitness)
    else:
        past_fitness.pop(0)
        past_fitness.append(fitness)
        for i in range(moves_to_check):
            if past_fitness[i] > past_fitness[0]:
                break
            #Kills player if no progress in fitness for moves_to_check moves
            if i == moves_to_check - 1:
                return True

In [12]:
def mutate_moves(player):
    for i in range(moves_amount):
        num = random.random()
        if num < mutation_rate:
            player.moves[i] = env.action_space.sample()

def agent_get_moves(player, population):
    if not player.moves:
        for i in range(moves_amount):
            player.moves.append(env.action_space.sample())
    else:
        if population:
            mutate_moves(player)

In [13]:
def get_fittest(population):
    player_num = 0
    for i in range(1, len(population)):
        if population[i].fitness > population[player_num].fitness:
            player_num = i
    return population[player_num]

def play_generation(fittest):
    global mutation_rate
    population = []
    for i in range(population_amount):
        player = Agent(fittest)
        observation = env.reset()
        done = False
        past_fitness = []
        agent_get_moves(player, population)
        for move in range(len(player.moves)):
            if done:
                break
            state, reward, done, info = env.step(player.moves[move])
            if done:
                break
            done = check_fitness(player, info['x_pos'], past_fitness)
            handle_frame(move, player)
        population.append(player)
        print("Player {} achieved a fitness of {}".format(i, player.fitness))
        ##print reason why game ended here
        #if not player == get_fittest(population):
            #print("plt freed")
        plt.close("all")
    if fittest and fittest.fitness == get_fittest(population).fitness:
        print("Increasing mutation chance by 0.1")
        mutation_rate += 0.1
        print("Mutation chance is now {}%".format(mutation_rate*100))
    else:
        print("Fitness increased succesfully, restore to 0.01")
        mutation_rate = 0.01
    return population

In [14]:
fittest = None
for generation in range(generation_amount):
    population = play_generation(fittest)
    fittest = get_fittest(population)
    print("GENERATION {} HIGHEST FITNESS ACHIEVED: {}".format(generation,
                                                             fittest.fitness))
    for i in range(population_amount):
        plt.close(population[i].fig)

  return (self.ram[0x86] - self.ram[0x071c]) % 256


Player 0 achieved a fitness of 434
Player 1 achieved a fitness of 305
Player 2 achieved a fitness of 435
Player 3 achieved a fitness of 594
Player 4 achieved a fitness of 596
Player 5 achieved a fitness of 596
Player 6 achieved a fitness of 306
Player 7 achieved a fitness of 595
Player 8 achieved a fitness of 595
Player 9 achieved a fitness of 594
Player 10 achieved a fitness of 305
Player 11 achieved a fitness of 594
Player 12 achieved a fitness of 596
Player 13 achieved a fitness of 722
Player 14 achieved a fitness of 436
Player 15 achieved a fitness of 595
Player 16 achieved a fitness of 596
Player 17 achieved a fitness of 435
Player 18 achieved a fitness of 594
Player 19 achieved a fitness of 307
Player 20 achieved a fitness of 594
Player 21 achieved a fitness of 307
Player 22 achieved a fitness of 595
Player 23 achieved a fitness of 435
Player 24 achieved a fitness of 723
Player 25 achieved a fitness of 294
Player 26 achieved a fitness of 306
Player 27 achieved a fitness of 435
Pl

Player 4 achieved a fitness of 594
Player 5 achieved a fitness of 309
Player 6 achieved a fitness of 437
Player 7 achieved a fitness of 595
Player 8 achieved a fitness of 596
Player 9 achieved a fitness of 595
Player 10 achieved a fitness of 594
Player 11 achieved a fitness of 596
Player 12 achieved a fitness of 309
Player 13 achieved a fitness of 311
Player 14 achieved a fitness of 596
Player 15 achieved a fitness of 594
Player 16 achieved a fitness of 435
Player 17 achieved a fitness of 595
Player 18 achieved a fitness of 594
Player 19 achieved a fitness of 435
Player 20 achieved a fitness of 594
Player 21 achieved a fitness of 596
Player 22 achieved a fitness of 307
Player 23 achieved a fitness of 435
Player 24 achieved a fitness of 436
Player 25 achieved a fitness of 293
Player 26 achieved a fitness of 594
Player 27 achieved a fitness of 595
Player 28 achieved a fitness of 308
Player 29 achieved a fitness of 435
Increasing mutation chance by 0.1
GENERATION 7 HIGHEST FITNESS ACHIEVE

KeyboardInterrupt: 

Animate results

In [None]:
if save_animation:
    player_num = 0
    for i in range(1, len(population)):
        if population[i].fitness > population[player_num].fitness:
            player_num = i
    print("Displaying player {} with fitness score {}.".format(player_num,
                                    population[player_num].fitness))
    display_animation(population[player_num])
print(fittest.moves)

In [None]:
env.close()
plt.close('all')

In [None]:
print(SIMPLE_MOVEMENT)
print(env)