In [1]:
import cv2
import numpy as np
def _process_frame42(frame):
    frame = frame[34:34+160, :160]
    # Resize by half, then down to 42x42 (essentially mipmapping). If
    # we resize directly we lose pixels that, when mapped to 42x42,
    # aren't close enough to the pixel boundary.
    frame = cv2.resize(frame, (80, 80))
    frame = cv2.resize(frame, (42, 42))
    frame = frame.mean(2)
    frame = frame.astype(np.float32)
    frame *= (1.0 / 255.0)
    frame = np.reshape(frame, [42, 42, 1])
    return frame

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
from scipy.signal import convolve2d
import theano
class NeuralNetwork:
    conv1_size = 5
    conv2_size = 5
    evolution_probability = 0.97
    scale_factor = 1
    def __init__(self):
        self.conv1_filtr = np.random.standard_normal((self.conv1_size, self.conv1_size))
        self.conv2_filtr = np.random.standard_normal((self.conv2_size, self.conv2_size))
        self.dense1_weights = np.random.standard_normal((50, 100))
        self.dense2_weights = np.random.standard_normal((101, 3))
    def Convolve(self, compressed_observation):
        input_var = T.dmatrix('inputs')

        pooling = theano.function([input_var],
                                  theano.tensor.signal.pool.pool_2d(input_var, (2, 2), ignore_border=True))
        return pooling(convolve2d(pooling(convolve2d(compressed_observation, self.conv1_filtr, mode='valid')),
                          self.conv2_filtr, mode='valid'))
    def ForwardPropogate(self, compressed_observation):
        result_convolution = self.Convolve(compressed_observation)
        result_convolution = result_convolution.reshape(1, -1)
        result_convolution = np.append(result_convolution, np.array([1]).reshape(1, -1), axis=1)
        dense1_output = result_convolution.dot(self.dense1_weights)
        dense1_activations = 1 / (1 + np.exp(- dense1_output))
        dense1_activations = np.append(dense1_activations, np.array([1]).reshape(1, -1), axis=1)
        dense2_output = dense1_activations.dot(self.dense2_weights)
        dense2_activations =  1 / (1 + np.exp(- dense2_output))
        return dense2_activations
    def Evolution(self):
        new_network = NeuralNetwork()
        new_network.conv1_filtr = self.conv1_filtr +\
            ((np.random.standard_normal((self.conv1_size, self.conv1_size)) - self.evolution_probability) > 0) \
            * np.random.standard_normal((self.conv1_size, self.conv1_size)) * self.scale_factor
        new_network.conv2_filtr = self.conv2_filtr +\
            ((np.random.standard_normal((self.conv2_size, self.conv2_size)) - self.evolution_probability) > 0) \
            * np.random.standard_normal((self.conv2_size, self.conv2_size)) * self.scale_factor
        new_network.dense1_weights = self.dense1_weights +\
            ((np.random.standard_normal(self.dense1_weights.shape) - self.evolution_probability) > 0) \
            * np.random.standard_normal(self.dense1_weights.shape) * self.scale_factor
        new_network.dense2_weights = self.dense2_weights +\
            ((np.random.standard_normal(self.dense2_weights.shape) - self.evolution_probability) > 0) \
            * np.random.standard_normal(self.dense2_weights.shape) * self.scale_factor
        return new_network

Using gpu device 0: GeForce GTX 980 (CNMeM is disabled, cuDNN 5105)


In [5]:
def predict_action(observation, network):
    compressed_observation = _process_frame42(observation)
    return np.argmax(network.ForwardPropogate(compressed_observation[:,:,0]))

In [6]:
import gym
from gym import wrappers

In [7]:
env = gym.make("Skiing-v0")
env = wrappers.Monitor(env, "/tmp/gym-results", force=True)
def PlayGame(network):
    observation = env.reset()
    done = False
    iteration, all_reward = 0, 0
    num_of_last_actions = 0
    last_action = None
    while not done:
        iteration += 1
        env.render()
        action = predict_action(observation, network)
        observation, reward, done, info = env.step(action)
        all_reward += reward
        if all_reward < -15000 or iteration >= 4000:
            break
            
        # print(all_reward, iteration)

    print("Reward:", all_reward)
    env.close()
    return all_reward

[2017-02-06 13:23:27,078] Making new env: Skiing-v0
[2017-02-06 13:23:27,236] Creating monitor directory /tmp/gym-results


In [8]:
import theano
import theano.tensor as T
import lasagne

In [9]:
network = NeuralNetwork()

In [10]:
observation = env.reset()

[2017-02-06 13:23:40,056] Starting new video recorder writing to /tmp/gym-results/openaigym.video.0.22446.video000000.mp4


In [11]:
np.max(_process_frame42(observation)[:,:,0])

0.92549026

In [12]:
network.ForwardPropogate(_process_frame42(observation)[:,:,0])

array([[  4.02072524e-03,   9.52654287e-01,   8.95029217e-04]])

In [13]:
env.close()

[2017-02-06 13:23:44,528] Finished writing results. You can upload them to the scoreboard via gym.upload('/tmp/gym-results')


# Training

In [14]:
network = NeuralNetwork()
reward = PlayGame(network)
num_evolution_try = 3
iteration = 0
while reward < -6000:
    iteration += 1
    print("_____________________________________Iteration:", iteration)
    
    evolution_rewards = []
    evolution_networks = []
    
    for i in range(0, num_evolution_try):
        new_network = network.Evolution()
        evolution_networks += [new_network]
        print("Playing evolution game", i)
        evolution_rewards += [PlayGame(new_network)]
    i_max = np.argmax(evolution_rewards)
    if evolution_rewards[i_max] < reward:
        continue
    else:
        network = evolution_networks[i_max]
#     for i in range(0, num_evolution_try):
#         if evolution_rewards[i] <= -30000:
#             continue
#         network.conv1_filtr += scaled_rewards[i] / max_reward * evolution_networks[i].conv1_filtr /\
#             (num_good + 1)
#         network.conv2_filtr += scaled_rewards[i] / max_reward * evolution_networks[i].conv2_filtr /\
#             (num_good + 1)
#         network.dense1_weights += scaled_rewards[i] / max_reward * evolution_networks[i].dense1_weights /\
#             (num_good + 1)
#         network.dense2_weights += scaled_rewards[i] / max_reward * evolution_networks[i].dense2_weights /\
#             (num_good + 1)

NoSuchDisplayException: Cannot connect to "None"

In [28]:
PlayGame(network)

[2017-01-26 22:27:55,931] Starting new video recorder writing to /tmp/gym-results/openaigym.video.6.12171.video000000.mp4
[2017-01-26 22:28:14,441] Finished writing results. You can upload them to the scoreboard via gym.upload('/tmp/gym-results')


Evolution
Reward: -9013.0


-9013.0

In [19]:
from six.moves import cPickle
def save_to_cPickle(file_name, obj):
    f = open(file_name + '.save', 'wb')
    cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()

def load_from_cPickle(file_name):
    f = open(file_name + '.save', 'rb')
    loaded_obj = cPickle.load(f)
    f.close()
    return loaded_obj

In [20]:
save_to_cPickle("best_network", network)

In [21]:
new_net = load_from_cPickle("best_network")

In [25]:
PlayGame(new_net)

[2017-01-26 17:52:30,845] Making new env: Skiing-v0
[2017-01-26 17:52:30,932] Clearing 4 monitor files from previous run (because force=True was provided)
[2017-01-26 17:52:30,966] Starting new video recorder writing to /tmp/gym-results/openaigym.video.68.7488.video000000.mp4
[2017-01-26 17:52:49,579] Finished writing results. You can upload them to the scoreboard via gym.upload('/tmp/gym-results')


Reward: -9011.0


-9011.0