In [1]:
import torch.nn as nn
import torch
import numpy as np
import gym
import cv2
import torch.nn.functional as F


import ipynb.fs.full.misc as misc
import ipynb.fs.full.breeder as br
import ipynb.fs.full.mutation_functions as mf
import ipynb.fs.full.selection_functions as sf
import ipynb.fs.full.crossover_functions as cf
from ipynb.fs.full.agent_class import Agent
import ipynb.fs.full.net_class as nm

In [2]:
class RiverraidAgentNet(nm.NetModule):
    def __init__(self):
        super(RiverraidAgentNet, self).__init__()
                
        # restrict action space
        # total number is 18
        self.num_outputs = 17
        self.input_len = 4
        
        #  define net structure here
        self.model = nn.Sequential(
            nn.Conv2d(self.input_len, 16, 8, stride=2, padding=0),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(16),
            
            nn.Conv2d(16, 32, 4, stride=2, padding=0),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(32)            
        ).cuda()
        
        self.classifier = nn.Sequential(
            nn.Linear(1728, 256),
            nn.ReLU(),            
            nn.Linear(256, self.num_outputs),
            nn.Softmax()
        ).cuda()

        self.parameters_count = self.count_parameters()

    def forward(self, inputs):
        c = self.model(inputs)
        c = c.view(c.size(0), -1)        
        c = self.classifier(c)
        
        return c

In [None]:
class RiverraidAgent(Agent):
    def __init__(self, dna=None):
        super(RiverraidAgent, self).__init__(dna, RiverraidAgentNet())
        
        self.state = [np.zeros((1, 85, 60), dtype=np.float32)] * self.net.input_len
        self.beta = 0.999       
        
    def preprocess(self, img):
        def to_grayscale(img):
            return np.mean(img, axis=2, keepdims=True)

        def downsample(img):
            img = cv2.resize(img, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_NEAREST)
            return img[:85, 10:70]

        def normalize(img):
            return (img - 127.5) / 127.5
    
        def to_chw(img):
            return img.transpose(2, 0, 1)
        
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = downsample(img)
        img = to_grayscale(img)        
        
        cv2.imshow("gray", img.astype(np.uint8))
        cv2.waitKey(1)        
        
        img = normalize(img)
        return to_chw(img)
                
    def step(self, observation):
        observation = self.preprocess(np.asarray(observation, dtype=np.float32))
        
        self.state.pop(0)
        self.state.append(observation)
        
        state = np.concatenate(self.state, axis=0)
        
        torch_state = misc.single_to_torch(state, cuda=self.net.is_cuda())        
        torch_action = self.net(torch_state)
        action = misc.single_from_torch(torch_action, cuda=self.net.is_cuda())

        self.add_experience(state, action)        
        
        return np.argmax(action) + 1 # ignore noop action for now

    def evaluate(self, environment, render=False, sleep=False, idx=0):
        done = False
        beta = self.beta
        fitness = 0.0
        unadjusted_fitness = 0.0
        
        obs = environment.reset()
        
        n = 1
        while not done:
            action = self.step(obs)            
            obs, reward, done, info = environment.step(action)
            fitness += (beta**n) * reward           
            unadjusted_fitness += reward
            
            n += 1
            if render:
                environment.render()
                if sleep:
                    time.sleep(0.0125)

        self.fitness = fitness
        self.unadjusted_fitness = unadjusted_fitness
        
        return self.fitness
    
    @staticmethod
    def produce_offspring(dna):
        return RiverraidAgent(dna=dna)

misc.init_random(1337)

env = gym.make("Riverraid-v0")

print(env.unwrapped.get_action_meanings())

population_size = 100
population = [RiverraidAgent()] * population_size

breeder = br.Breeder(population,
                     env,
                     20000.,
                     generations=2000,
                     elite_percent=0.1,
                     mutation_rate=0.1,
                     selection_pressure=0.05,
                     selection_func=sf.tournament,
                     crossover_func=cf.none,
                     mutation_func=mf.sm_g_sum_r,
                     render=False)

breeder.load_checkpoint()
# agent = breeder.population[0]
agent = breeder.evolve()


for _ in range(10):
    print(agent.evaluate(env, render=True, sleep=True))

['NOOP', 'FIRE', 'UP', 'RIGHT', 'LEFT', 'DOWN', 'UPRIGHT', 'UPLEFT', 'DOWNRIGHT', 'DOWNLEFT', 'UPFIRE', 'RIGHTFIRE', 'LEFTFIRE', 'DOWNFIRE', 'UPRIGHTFIRE', 'UPLEFTFIRE', 'DOWNRIGHTFIRE', 'DOWNLEFTFIRE']


  input = module(input)


Gen: 0 , Unadjusted max fitness: 1620.0 , Max fitness: 338.85074434914327 , Min fitness: 108.62535971135883 , Age: 0 , Mean fitness: 220.8618387546391
en 10
Gen: 1 , Unadjusted max fitness: 2150.0 , Max fitness: 1508.0781952211232 , Min fitness: 120.1279869698321 , Age: 0 , Mean fitness: 960.891739550227
en 10
Gen: 2 , Unadjusted max fitness: 2610.0 , Max fitness: 1658.4454711051762 , Min fitness: 481.2043854409887 , Age: 0 , Mean fitness: 1083.6242684705703
en 10
Gen: 3 , Unadjusted max fitness: 2240.0 , Max fitness: 1514.882684973492 , Min fitness: 429.19249384982606 , Age: 0 , Mean fitness: 1092.1920380095085
en 10
Gen: 4 , Unadjusted max fitness: 2550.0 , Max fitness: 1613.4077097897857 , Min fitness: 444.1650760529541 , Age: 0 , Mean fitness: 1081.769999779831
en 10
Gen: 5 , Unadjusted max fitness: 2730.0 , Max fitness: 1738.9520760576904 , Min fitness: 633.8446266163331 , Age: 0 , Mean fitness: 1105.5832705355078
en 10
Gen: 6 , Unadjusted max fitness: 2370.0 , Max fitness: 1598.2