In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import random

import gym
from gym import spaces
import keras
from keras.layers import *
from keras.models import *
import numpy
import rl
import scipy.sparse
import skimage.io

import sys
print(sys.version)

%matplotlib inline

Using TensorFlow backend.


3.6.5 (default, Apr  1 2018, 05:46:30) 
[GCC 7.3.0]


In [None]:
# input_neurons = 8
# inter_neurons = 128
# output_neurons = 8
# max_history = 16
# hidden_size = 32

In [15]:
def sigmoid(x):
    return 1 / (1 + numpy.exp(-x))

In [3]:
def hebbian(history):
    return numpy.tanh(numpy.vectorize(lambda x: -1/5*(x-5))(history))

In [41]:
class SNN(gym.Env):
    
    def __init__(self, specification):
        self.specification = specification
        self.total_neurons = self.specification['input_neurons'] + self.specification['inter_neurons'] + self.specification['output_neurons']
        self.observation_space = gym.spaces.Box(-1, float('inf'), ((self.specification['inter_neurons'] + self.specification['output_neurons']) * (1 + 2 * (self.specification['input_neurons'] + self.specification['inter_neurons'])),), dtype=float)
        self.action_space = gym.spaces.Box(-1, 1, ((self.specification['inter_neurons'] + self.specification['output_neurons']) * (2 * (self.specification['input_neurons'] + self.specification['inter_neurons'])),), dtype=float)
#         self.observation_space = gym.spaces.Tuple((gym.spaces.Box(-1, 1, (self.specification['input_neurons'] + self.specification['inter_neurons'],)), gym.spaces.Box(-1, 1, (self.specification['input_neurons'] + self.specification['inter_neurons'],))))
        self.potential_matrix = numpy.zeros((self.total_neurons,))
        self.weight_matrix = numpy.zeros((self.specification['inter_neurons'] + self.specification['output_neurons'], self.specification['input_neurons'] + self.specification['inter_neurons']))
        self.weight_backup = self.weight_matrix.copy()
        self.weight_mask = numpy.ones_like(self.weight_matrix, dtype=numpy.uint8)
        self.weight_mask[-self.specification['output_neurons']:, :self.specification['input_neurons']] = 0
        numpy.fill_diagonal(self.weight_mask[:self.specification['inter_neurons'], -self.specification['inter_neurons']:], 0)
        self.history_matrix = numpy.zeros((self.specification['inter_neurons'] + self.specification['output_neurons'], 1 + 2 * (self.specification['input_neurons'] + self.specification['inter_neurons'])))
        self.random_seed = None
        self.next_input = None
        self.previous_reward = None
    
    def interconnect(self):
        self.weight_matrix = numpy.zeros_like(self.weight_matrix)
        probabilities = numpy.vectorize(lambda i, j: 1 / abs(i - j) if i != j else 0.)(*numpy.meshgrid(range(self.specification['inter_neurons']), range(self.specification['inter_neurons'])))
        mask = numpy.zeros((self.weight_matrix.shape[0], self.weight_matrix.shape[1]))
        mask[:-self.specification['output_neurons'], self.specification['input_neurons']:] = numpy.random.binomial(1, probabilities, (self.specification['inter_neurons'], self.specification['inter_neurons']))
        mask[:self.specification['inter_neurons'], :self.specification['input_neurons']] = numpy.eye(self.specification['inter_neurons'])[numpy.random.choice(self.specification['inter_neurons'], self.specification['input_neurons'])].swapaxes(0, 1)
        mask[self.specification['inter_neurons']:, self.specification['input_neurons']:] = numpy.eye(self.specification['inter_neurons'])[numpy.random.choice(self.specification['inter_neurons'], self.specification['output_neurons'])]
        rand1 = numpy.random.uniform(0, 1, (self.weight_matrix.shape[0], self.weight_matrix.shape[1]))
        rand1[:-self.specification['output_neurons'], self.specification['input_neurons']:] = rand1[:-self.specification['output_neurons'], self.specification['input_neurons']:] * 2 - 1
        rand2 = numpy.random.uniform(0, 1, (self.weight_matrix.shape[0], self.weight_matrix.shape[1]))
        self.weight_matrix = mask * rand1 * rand2 * self.weight_mask
        self.weight_backup = self.weight_matrix.copy()
#         self.sign_matrix = numpy.sign(self.weight_matrix)
    
    def load_weights(self, weights):
        self.weight_matrix[:, :] = weights
        self.sign_matrix[:, :] = numpy.sign(self.weight_matrix)
    
    def close(self):
        self.specification['environment'].close()
        
    def reset(self):
        self.next_input = self.specification['environment'].reset()
        self.potential_matrix = numpy.zeros_like(self.potential_matrix)
        self.weight_matrix = self.weight_backup
        self.history_matrix = numpy.zeros_like(self.history_matrix)
        return self.history_matrix.flatten()
    
    def render(self, mode='human'):
        return self.specification['environment'].render(mode)
    
    def seed(self, seed):
        numpy.random.seed(seed)
        self.random_seed = seed
        return self.specification['environment'].seed(self.random_seed)
    
    def step(self, action):
#         self.next_input = np.eye(self.specification['input_neurons'])[np.random.choice(self.specification['input_neurons'], 1)]
        if self.specification['neuroplasticity']:
            action = action.reshape((self.specification['inter_neurons'] + self.specification['output_neurons'], 2 * (self.specification['input_neurons'] + self.specification['inter_neurons'])))
            action[self.specification['input_neurons'] + self.specification['inter_neurons']:] = numpy.round(sigmoid(action[self.specification['input_neurons'] + self.specification['inter_neurons']:]))
            action[:self.specification['input_neurons'] + self.specification['inter_neurons']] = numpy.tanh(action[:self.specification['input_neurons'] + self.specification['inter_neurons']])
            self.weight_matrix += self.specification['learning_rate'] * action[:, self.specification['input_neurons'] + self.specification['inter_neurons']:] * action[:, :self.specification['input_neurons'] + self.specification['inter_neurons']]
            self.weight_matrix = numpy.clip(numpy.multiply(self.weight_matrix, self.weight_mask), -1, 1)
#         state = numpy.zeros_like(self.history_matrix[self.neuron_idx, :])
#         reward = 0
#         terminal = False
        self.potential_matrix[:self.specification['input_neurons']] = numpy.add(self.potential_matrix[:self.specification['input_neurons']], self.next_input)
        firing_matrix = numpy.vectorize(lambda x: x >= 1)(self.potential_matrix)
        for i in range(self.specification['inter_neurons'] + self.specification['output_neurons']):
            pos = self.specification['input_neurons'] + i
            deltas = numpy.multiply(firing_matrix[:-self.specification['output_neurons']], self.weight_matrix[i])
            delta = numpy.sum(deltas)
            if self.specification['neuroplasticity']:
                self.history_matrix[i, self.specification['input_neurons'] + self.specification['inter_neurons']:] += 1
                self.history_matrix[i, self.specification['input_neurons'] + self.specification['inter_neurons']:-1] *= firing_matrix[:-self.specification['output_neurons']]
                self.history_matrix[i, -1] *= firing_matrix[pos]
                self.history_matrix[i, :self.specification['input_neurons'] + self.specification['inter_neurons']] = self.weight_matrix[i]
#                 self.history_matrix[i, self.weight_idx, self.specification['max_history'] - 1, :] = numpy.array([deltas[self.weight_idx], delta, self.potential_matrix[pos], firing_matrix[pos]])
            self.potential_matrix[pos] += delta
        self.potential_matrix = numpy.clip(numpy.multiply(self.potential_matrix, numpy.invert(firing_matrix)), -1, 1)
#         if self.specification['neuroplasticity']:
#             self.history_matrix = numpy.roll(self.history_matrix, 2, axis=1)
        state = self.history_matrix.flatten()
        self.next_input, reward, terminal, _ = self.specification['environment'].step(firing_matrix[-self.specification['output_neurons']:].astype(int))
        return state, reward, terminal, {}

In [5]:
class Test(gym.Env):
    
    def __init__(self):
        self.action_space = gym.spaces.Box(0, 1, (8,))
        self.observation_space = gym.spaces.Box(0, float('inf'), (8,))

    def close(self):
        pass

    def reset(self):
        self.state = numpy.zeros((8,))
        self.idx = 0
        return self.state

    def step(self, action):
        self.state = numpy.add(self.state, action)
        reward = ((self.state[1] - self.state[0]) * (self.state[2] - self.state[3])) * ((self.state[4] - self.state[5]) * (self.state[6] - self.state[7]))
        terminal = reward < 0
        self.idx += 1
        return numpy.ones(self.state.shape), reward, terminal, {}

In [32]:
class Test2(gym.Env):
    
    def __init__(self):
        self.action_space = gym.spaces.Discrete(1)
        self.observation_space = gym.spaces.Box(0, 1, (1,))
        self.state = None
        self.idx = None
        self.random_seed = None
    
    def seed(self, seed):
        self.random_seed = seed
        random.seed(seed)
        return seed
    
    def close(self):
        pass
    
    def reset(self):
        self.state = numpy.ones((1,))
        self.idx = 0
        return self.state
    
    def step(self, action):
        self.idx += 1
        terminal = self.idx == 10000
        if action[0] == 1:
            reward = 1.0
        else:
            reward = 0.0
        return self.state, reward, terminal, {}

In [None]:
env = SNN({'environment': Test2(), 'input_neurons': 1, 'inter_neurons': 10, 'output_neurons': 1,  'neuroplasticity': True, 'learning_rate': 0.1})
numpy.random.seed(0)
env.seed(0)
env.interconnect()
env.reset()

In [None]:
skimage.io.imshow(env.weight_matrix)

In [None]:
action = numpy.zeros(11 * (2 * 11))

In [None]:
for i in range(1):
    stuff = env.step(action)

In [None]:
skimage.io.imshow(env.potential_matrix.reshape((3, 4)))

In [None]:
gym.undo_logger_setup()

In [None]:
input_neurons = 1
inter_neurons = 10
output_neurons = 1

In [None]:
neurons = input_neurons + inter_neurons + output_neurons

In [None]:
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Concatenate
from keras.optimizers import Adam

from rl.agents import NAFAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess
from rl.core import Processor

In [None]:
# Get the environment and extract the number of actions.
env = SNN({
    'environment': Test2(),
    'input_neurons': input_neurons,
    'inter_neurons': inter_neurons,
    'output_neurons': output_neurons,
    'neuroplasticity': True,
    'learning_rate': 0.1})
numpy.random.seed(0)
env.seed(0)
assert len(env.action_space.shape) == 1
nb_actions = env.action_space.shape[0]

In [None]:
(1,) + env.observation_space.shape

In [None]:
nb_actions

In [None]:
# Build all necessary models: V, mu, and L networks.
V_model = Sequential()
V_model.add(Reshape(input_shape=(1,) + env.observation_space.shape, target_shape=(inter_neurons + output_neurons, 1 + 2 * (input_neurons + inter_neurons))))
V_model.add(TimeDistributed(Dense(neurons, activation='relu')))
V_model.add(TimeDistributed(Dense(neurons, activation='relu')))
V_model.add(TimeDistributed(Dense(neurons, activation='relu')))
V_model.add(Flatten())
V_model.add(Dense(1, activation='linear'))
print(V_model.summary())

mu_model = Sequential()
mu_model.add(Reshape(input_shape=(1,) + env.observation_space.shape, target_shape=(inter_neurons + output_neurons, 1 + 2 * (input_neurons + inter_neurons))))
mu_model.add(TimeDistributed(Dense(neurons, activation='relu')))
mu_model.add(TimeDistributed(Dense(neurons, activation='relu')))
mu_model.add(TimeDistributed(Dense(neurons, activation='relu')))
mu_model.add(TimeDistributed(Dense(2 * (input_neurons + inter_neurons), activation='tanh')))
mu_model.add(Flatten())
print(mu_model.summary())

action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
x = Concatenate()([action_input, Flatten()(observation_input)])
x = Dense(2 * neurons, activation='relu')(x)
x = Dense(2 * neurons, activation='relu')(x)
x = Dense(2 * neurons, activation='relu')(x)
x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x)
x = Activation('linear')(x)
L_model = Model(inputs=[action_input, observation_input], outputs=x)
print(L_model.summary())

In [None]:
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions)
agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model,
                 memory=memory, nb_steps_warmup=100, random_process=random_process,
                 gamma=.99, target_model_update=1e-3, processor=None)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

In [None]:
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=50000, visualize=False, verbose=2, nb_max_episode_steps=200)

In [None]:
env.weight_matrix

In [None]:
# After training is done, we save the final weights.
# agent.save_weights('cdqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=10, visualize=False, nb_max_episode_steps=200)

In [13]:
input_neurons = 1
inter_neurons = 10
output_neurons = 1
neurons = input_neurons + inter_neurons + output_neurons

In [7]:
import numpy as np
import gym

from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Concatenate
from keras.optimizers import Adam

from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess

In [8]:
gym.undo_logger_setup()



In [42]:
# Get the environment and extract the number of actions.
env = SNN({
    'environment': Test2(),
    'input_neurons': input_neurons,
    'inter_neurons': inter_neurons,
    'output_neurons': output_neurons,
    'neuroplasticity': True,
    'learning_rate': 0.1})
np.random.seed(123)
env.seed(123)
env.interconnect()
assert len(env.action_space.shape) == 1
nb_actions = env.action_space.shape[0]

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [43]:
# Next, we build a very simple model.
actor = Sequential()
actor.add(Reshape(input_shape=(1,) + env.observation_space.shape, target_shape=(inter_neurons + output_neurons, 1 + 2 * (input_neurons + inter_neurons))))
actor.add(TimeDistributed(Dense(neurons, activation='relu')))
actor.add(TimeDistributed(Dense(neurons, activation='relu')))
actor.add(TimeDistributed(Dense(neurons, activation='relu')))
actor.add(TimeDistributed(Dense(2 * (input_neurons + inter_neurons), activation='tanh')))
actor.add(Flatten())
print(actor.summary())

action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
reshaped_action = Reshape((inter_neurons + output_neurons, 2 * (input_neurons + inter_neurons)))(action_input)
reshaped_observation = Reshape((inter_neurons + output_neurons, 1 + 2 * (input_neurons + inter_neurons)))(observation_input)
x = Concatenate()([reshaped_action, reshaped_observation])
x = TimeDistributed(Dense(2 * neurons, activation='relu'))(x)
x = TimeDistributed(Dense(2 * neurons, activation='relu'))(x)
x = TimeDistributed(Dense(2 * neurons, activation='relu'))(x)
x = Flatten()(x)
x = Dense(1, activation='linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_22 (Reshape)         (None, 11, 23)            0         
_________________________________________________________________
time_distributed_49 (TimeDis (None, 11, 12)            288       
_________________________________________________________________
time_distributed_50 (TimeDis (None, 11, 12)            156       
_________________________________________________________________
time_distributed_51 (TimeDis (None, 11, 12)            156       
_________________________________________________________________
time_distributed_52 (TimeDis (None, 11, 22)            286       
_________________________________________________________________
flatten_15 (Flatten)         (None, 242)               0         
Total params: 886
Trainable params: 886
Non-trainable params: 0
_________________________________________________________________
None
_______

In [44]:
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

In [45]:
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_episode_steps=200)

Training for 50000 steps ...
Interval 1 (0 steps performed)
50 episodes - episode_reward: 47.420 [0.000, 92.000] - loss: 1.730 - mean_absolute_error: 0.449 - mean_q: -11.083

Interval 2 (10000 steps performed)
50 episodes - episode_reward: 80.020 [18.000, 93.000] - loss: 1.709 - mean_absolute_error: 0.643 - mean_q: 19.343

Interval 3 (20000 steps performed)
50 episodes - episode_reward: 84.580 [54.000, 99.000] - loss: 30.146 - mean_absolute_error: 1.708 - mean_q: 90.070

Interval 4 (30000 steps performed)
  567/10000 [>.............................] - ETA: 1:21 - reward: 0.4832done, took 259.931 seconds


<keras.callbacks.History at 0x7f745327e4e0>

In [None]:
# After training is done, we save the final weights.
# agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)

In [40]:
env.weight_matrix

array([[-1.        , -0.        ,  1.        , -1.        , -1.        ,
         1.        , -1.        , -1.        , -0.35841848,  1.        ,
         1.        ],
       [-1.        , -1.        ,  0.        , -0.99418762, -1.        ,
         1.        , -1.        , -1.        , -0.00848113,  1.        ,
         1.        ],
       [-1.        , -1.        ,  1.        , -0.        , -1.        ,
         1.        , -1.        , -1.        ,  0.08202426,  1.        ,
         1.        ],
       [-1.        , -1.        ,  1.        , -0.69117713, -0.        ,
         1.        , -1.        , -1.        ,  0.3561765 ,  1.        ,
         1.        ],
       [-1.        , -1.        ,  1.        , -1.        , -1.        ,
         0.        , -1.        , -1.        ,  0.3951716 ,  1.        ,
         1.        ],
       [-1.        , -1.        ,  1.        , -0.99180608, -1.        ,
         1.        , -0.        , -1.        , -0.16146544,  1.        ,
         1.   