In [1]:
import numpy as np
from sklearn.utils import shuffle as skshuffle
import copy
import torch
from torch.autograd import Variable

import ipynb.fs.full.misc as misc

In [2]:
def none(parent, rate):
    return parent

In [3]:
def sm_r(parent, target_divergence):
    assert len(parent.observation_buffer) == len(parent.action_buffer)

    observations = np.asarray(parent.observation_buffer, dtype=np.float32)
    actions = np.asarray(parent.action_buffer, dtype=np.float32)

    observations, actions = skshuffle(observations, actions, n_samples=min(100, observations.shape[0]))

    noise = np.random.normal(0.0, 1.0, parent.size)
    scale = 1.0

    divergence = 100500.

    net = parent.net

    i = 0
    new_dna = None
    while divergence > target_divergence and i < 10:
        new_dna = parent.dna + noise * scale
        net.inject_parameters(new_dna)

        inp = misc.batch_to_torch(observations, cuda=net.is_cuda())
        res = net(inp)
        out = misc.batch_from_torch(res, cuda=net.is_cuda())

        divergence = misc.squared_loss(out, actions)
        scale /= 2.0
        i += 1

    return parent.produce_offspring(new_dna)

In [4]:
def sm_g_sum(parent, rate):
    assert len(parent.observation_buffer) == len(parent.action_buffer)
    net = parent.net
    
    # prepare actions and observations
    observations = np.asarray(parent.observation_buffer, dtype=np.float32)
    actions = np.asarray(parent.action_buffer, dtype=np.float32)    
    observations, actions = skshuffle(observations, actions, n_samples=min(100, observations.shape[0]))
    observations = misc.batch_to_torch(observations, cuda=net.is_cuda())
    
    tot_size = net.parameters_count
    is_cuda = net.is_cuda()
    
    # Jacobian is of outputs to parameters
    jacobian = torch.zeros(net.num_outputs, tot_size)
    
    # Save the behaviour of parent's net, it will be used later
    old_policy = net(observations)
    
    # Initialize Gradients with zeroes
    grad_output = torch.zeros(*old_policy.size())
    if is_cuda:
        grad_output = grad_output.cuda()

    # Set exclusively every output to 1 and compute gradient of parameters    
    for i in range(net.num_outputs):
        net.zero_grad()
        grad_output.zero_()
        grad_output[:, i] = 1.0

        # do a backward pass to get partial derivatives for every output
        old_policy.backward(grad_output, retain_graph=True)
        
        # for every output, put gradients computed into jacobian
        # it will be in the same shape as parameters 
        jacobian[i] = torch.from_numpy(net.extract_grad())

    # Calculate summed gradients sensitivity
    # Sum axis is the axis of outputs
    scaling = torch.sqrt((jacobian ** 2).sum(0))

    # Prevent something bad that can possibly happen due to numeric issues
    scaling[scaling == 0] = 1.0
    scaling[scaling < 0.01] = 0.01

    # Get random direction of perturbation
    noise = np.random.normal(0.0, rate, parent.size)
    
    # Modify the direction with respect to sensitivity
    delta = noise / scaling
    
    # Again, prevent windups
    final_delta = np.clip(delta, -0.2, 0.2)

    # Take a step in the direction of scaled perturbation
    params = net.extract_parameters() + final_delta

    child = parent.produce_offspring(params)
    return child

In [5]:
def sm_g_so(parent, rate):
    assert len(parent.observation_buffer) == len(parent.action_buffer)
    net = parent.net
    is_cuda = net.is_cuda()
    
    observations = np.asarray(parent.observation_buffer, dtype=np.float32)
    actions = np.asarray(parent.action_buffer, dtype=np.float32)

    observations, actions = skshuffle(observations, actions, n_samples=min(256, observations.shape[0]))

    observations = misc.batch_to_torch(observations, cuda=net.is_cuda())

    old_policy = net(observations)

    np_copy = np.array(old_policy.data.cpu().numpy(), dtype=np.float32)
    _old_policy_cached = Variable(torch.from_numpy(np_copy), requires_grad=False)
    
    if is_cuda:
        _old_policy_cached = _old_policy_cached.cuda()

    # loss = a measure of squared divergence from the old policy
    loss = ((old_policy - _old_policy_cached) ** 2).sum(1).mean(0)

    # take a first derivative
    # compute gradient of loss w.r.t net parameters
    loss_gradient = torch.autograd.grad(loss, net.parameters(), create_graph=True)
    flat_gradient = torch.cat([grads.view(-1) for grads in loss_gradient])
    if is_cuda:
        flat_gradient = flat_gradient.cuda()

    # choose a perturbation direction
    delta = np.random.normal(0.0, rate, parent.size)
    # normalize it
    direction = (delta / np.sqrt((delta ** 2).sum()))
    
    #convert to pytorch
    direction = np.asarray(direction, dtype=np.float32)
    direction_t = Variable(torch.from_numpy(direction), requires_grad=False)
    if is_cuda:
        direction_t = direction_t.cuda()

    # calculate second derivative along perturbation direction
    grad_v_prod = (flat_gradient * direction_t).sum()
    second_deriv = torch.autograd.grad(grad_v_prod, net.parameters())

    # extract a contiguous version of the second derivative
    sensitivity = torch.cat([g.contiguous().view(-1) for g in second_deriv])

    # return our re-scaling based on second order sensitivity
    scaling = torch.sqrt(torch.abs(sensitivity).data)

    scaling[scaling == 0] = 1.0
    scaling[scaling < 0.01] = 0.01

    delta /= scaling
    final_delta = np.clip(delta, -1.0, 1.0)

    params = net.extract_parameters() + final_delta

    return parent.produce_offspring(params)

In [6]:
def sm_g_sum_r(parent, target_divergence):
    assert len(parent.observation_buffer) == len(parent.action_buffer)
    net = parent.net

    observations = np.asarray(parent.observation_buffer, dtype=np.float32)
    actions = np.asarray(parent.action_buffer, dtype=np.float32)

    observations, actions = skshuffle(observations, actions, n_samples=min(100, observations.shape[0]))

    n_elements = -min(100, observations.shape[0])
    observations = observations[n_elements:]
    actions = actions[n_elements:]

    observations = misc.batch_to_torch(observations, cuda=net.is_cuda())

    tot_size = net.parameters_count
    jacobian = torch.zeros(net.num_outputs, tot_size)
    old_policy = net(observations)
    grad_output = torch.zeros(*old_policy.size())

    if net.is_cuda():
        grad_output = grad_output.cuda()

    # do a backward pass for each output
    for i in range(net.num_outputs):
        net.zero_grad()
        grad_output.zero_()
        grad_output[:, i] = 1.0

        old_policy.backward(grad_output, retain_graph=True)
        jacobian[i] = torch.from_numpy(net.extract_grad())

    # summed gradients sensitivity
    scaling = torch.sqrt((jacobian ** 2).sum(0))

    scaling[scaling == 0] = 1.0
    scaling[scaling < 0.01] = 0.01

    noise = np.random.normal(0.0, 1.0, parent.size)
    delta = noise / scaling
    delta = np.clip(delta, -1.0, 1.0)
    divergence = 100500.

    net = copy.deepcopy(parent.net)

    i = 0
    new_dna = None
    scale = 1.0
    while divergence > target_divergence and i < 10:
        new_dna = parent.dna + delta * scale
        net.inject_parameters(new_dna)

        res = net(observations)
        out = misc.batch_from_torch(res, cuda=net.is_cuda())

        divergence = misc.squared_loss(out, actions)
        
        scale /= 2.0
        i += 1

    return parent.produce_offspring(new_dna)

In [7]:
def gaussian(parent, variance):
    noise = np.random.normal(0.0, variance, parent.size)
    dna = parent.get_dna() + noise
    
    return parent.produce_offspring(dna)