In [None]:
import copy
import time
import numpy as np
np.set_printoptions(precision=8, suppress=True, linewidth=400, threshold=100)
import gym



In [None]:
class SensorimotorAutoencoderAgents(object):
    '''
    a group of autoencoders, each with the ability to encode one transition
    that work together to form a predictive sensorimotor inference engine.
    basically they map the space, distributedly, so that they can find a path
    from any observation to any other observation - they know how to manipulate
    the environment.

    they have overlapping input bits, but no two have the same inputs. some
    have no inputs from the environment at all, and instead get inputs only
    from other autoencoders. There are typically many autoencoders. they
    automatically wire themselves up (inefficienty, but successfully).
    '''

    def __init__(self, env, encoders_n=12):
        self.env = env
        self.encoders = self.generate_encoders(encoders_n)

    def generate_encoders(self, n):
        ''' https://blog.keras.io/building-autoencoders-in-keras.html '''
        from keras.layers import Input, Dense
        from keras.models import Model
        encoders = []
        for i in range(n):
            # this is the size of our encoded representations
            encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats
            # this is our input placeholder
            input_img = Input(shape=(784,))
            # "encoded" is the encoded representation of the input
            encoded = Dense(encoding_dim, activation='relu')(input_img)
            # "decoded" is the lossy reconstruction of the input
            decoded = Dense(784, activation='sigmoid')(encoded)
            # this model maps an input to its reconstruction
            autoencoder = Model(input_img, decoded)
            # Let's also create a separate encoder model:
            # this model maps an input to its encoded representation
            encoder = Model(input_img, encoded)
            # As well as the decoder model:
            # create a placeholder for an encoded (32-dimensional) input
            encoded_input = Input(shape=(encoding_dim,))
            # retrieve the last layer of the autoencoder model
            # Here we need to change this:
            # we want the decoder_layer to be the next timestep so we can train the
            # autoencoder on the transition from
            # one observation+action to a new observation:
            decoder_layer = autoencoder.layers[-1]
            # create the decoder model
            decoder = Model(encoded_input, decoder_layer(encoded_input))
            # autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
            encoders.append(autoencoder)
        # now wire them up up so they share latents to each other's inputs (at random)
        # also wire them up at random to the environment, and the action space...
        return encoders

    def step(self, obs):
        # they are predicting what action they will take. at first the observation
        # stands in as a random seed to activate the network, but soon they
        # wire up in a hierarchy and take actions to acheive what they think
        # they will see, instead of providing goals, you provide an image of
        # what you want them to see at the top layer of the hierarchy...
        sampled = env.action_space.sample()
        print(f'action sampled: {sampled}')
        return sampled



In [None]:

class SimpleCube(gym.Env):
    ''' a RubixCube with only two colors. so that every face can be binary '''
    metadata = {'render.modes': ['human']}
    def __init__(self):
        super(RubixCube, self).__init__()
        self.action_space = self._action_space()
        self.observation_space = self._observation_space()
        # should change the state to be a list of np array?
        self.cube_state =[
            1, 1, 1, 1,
            1, 1, 1, 1,
            0,
            0, 0, 0,
            1, 1, 1,
            1, 1, 1,
            0, 0, 0,
            0, 0,
            1, 1,
            1, 1,
            0, 0,
            0, 0, 0,
            1, 1, 1,
            1, 1, 1,
            0, 0,
            0, 0, 0, 0,
            0, 0, 0, 0]
        self.solved_state = copy.deepcopy(self.cube_state)
        self.do_right = {
            3: 16, 16: 45, 45: 32, 32: 3,
            4: 26, 26: 44, 44: 23, 23: 4,
            5: 36, 36: 43, 43: 12, 12: 5,
            14: 25, 25: 34, 34: 24, 24: 14,
            35: 33, 33: 13, 13: 15, 15: 35, }
        self.do_left = {
            7: 10, 10: 41, 41: 38, 38: 7,
            8: 22, 22: 48, 48: 27, 27: 8,
            1: 30, 30: 47, 47: 18, 18: 1,
            19: 9, 9: 29, 29: 39, 39: 19,
            20: 21, 21: 40, 40: 28, 28: 20, }
        self.do_top = {
            9: 12, 12: 15, 15: 18, 18: 9,
            10: 13, 13: 16, 16: 19, 19: 10,
            11: 14, 14: 17, 17: 20, 20: 11,
            1: 3, 3: 5, 5: 7, 7: 1,
            2: 4, 4: 6, 6: 8, 8: 2, }
        self.do_under = {
            30: 33, 33: 36, 36: 39, 39: 30,
            31: 34, 34: 37, 37: 40, 40: 31,
            32: 35, 35: 38, 38: 29, 29: 32,
            41: 43, 43: 45, 45: 47, 47: 41,
            42: 44, 44: 46, 46: 48, 48: 42, }
        self.do_front = {
            1: 13, 13: 43, 43: 29, 29: 1,
            2: 24, 24: 42, 42: 21, 21: 2,
            3: 33, 33: 41, 41: 9, 9: 3,
            10: 12, 12: 32, 32: 30, 30: 10,
            11: 23, 23: 31, 31: 22, 22: 11, }
        self.do_back = {
            7: 15, 15: 45, 45: 39, 39: 7,
            6: 25, 25: 46, 46: 28, 28: 6,
            5: 35, 35: 47, 47: 19, 19: 5,
            18: 16, 16: 36, 36: 38, 38: 18,
            17: 26, 26: 37, 37: 27, 27: 17, }
    def step(self, action):
        return self._request(action)
    def reset(self):
        return self._request(None)[0]
    def render(self, mode='human', close=False):
        action, obs, reward, done, info = self.state
        if action == None: print("{}\n".format(obs))
        else: print("{}\t\t--> {:.18f}{}\n{}\n".format(action, reward, (' DONE!' if done else ''), obs))

    def _action_space(self):
        '''
        left, right, top, under, front, back
        this is a deterministic env, it doesn't change unless you change it,
        therefore, no opperation isn't available.
        '''
        return gym.spaces.Discrete(6)

    def _observation_space(self):
        return gym.spaces.Box(low=np.NINF, high=np.inf, shape=(48,), dtype=np.float64)

    def _request(self, action):
        cube = copy.deepcopy(self.cube_state)
        if isinstance(action, int):
            action = {
                0: 'left', 1: 'right',
                2: 'top', 3: 'under',
                4: 'front', 5: 'back'}.get(action, None)
        if action is not None:
            for k, v in eval(f'self.do_{action}').items():
                self.cube_state[k] = cube[v]
        obs = self.cube_state
        reward = np.float64(0.0)  # real AGI doesn't need spoonfed 'rewards'
        done = False
        info = {}
        self.state = (action, obs, reward, done, info)
        return obs, reward, done, info

In [None]:
env = SimpleCube()
env.seed(0)
print("agent: env.action_space {}".format(env.action_space))
agent = SensorimotorAutoencoderAgents(env)
for i_episode in range(1):
    obs = env.reset()
    env.render()
    for t_timesteps in range(1000):
        action = agent.step(obs)
        obs, reward, done, info = env.step(action)
env.close()
