# Install dependencies

In [None]:
!pip install tensorflow==1.14
!pip install gym
!pip install keras
!pip install keras-rl2
!pip install pygame
!pip install stable-baselines[mpi]

# Test random environment from open AI gym

In [None]:
import gym
from gym import Env
from gym.spaces import Discrete, Box
import random
import numpy as np

In [None]:
class SquaresEnv(Env):
    def __init__(self):
        self.number_of_shapes = 37
        self.action_space = Box(low = 0, high = 1, shape=(3, ), dtype = np.float32)
        self.observation_space = Box(low = 0, high = 1, shape=(84, ), dtype = np.float32)
        
        self.state = dict(
        {
            'board': np.zeros((9, 9), dtype = np.uint8),
            'items': np.zeros(3, dtype = np.uint8)
        })
        self.time = 1
        self.state['items'][0] = random.randint(1,self.number_of_shapes)
        self.state['items'][1] = random.randint(1,self.number_of_shapes)
        self.state['items'][2] = random.randint(1,self.number_of_shapes)
        
    def checkFull(self, minX, maxX, minY, maxY):
        bingo = np.ones((3,3), dtype = np.uint8)
        if np.array_equal(self.state['board'][minY:maxY, minX:maxX], bingo):
            self.state['board'][minY:maxY, minX:maxX] = 0
            print('yay')
            return 10
        else:
            return 0
        
    def getShape(self, shapeType):
        shape = np.zeros((2,1))
        if shapeType == 1: # X
            shape = np.ones((1,1), dtype = np.uint8)
        elif shapeType == 2: # XX
            shape = np.ones((2,1), dtype = np.uint8)
        elif shapeType == 3:
            shape = np.ones((1,2), dtype = np.uint8)
        elif shapeType == 4: # XXX
            shape = np.ones((3,1), dtype = np.uint8)
        elif shapeType == 5:
            shape = np.ones((1,3), dtype = np.uint8)
        elif shapeType == 6: # XXXX
            shape = np.ones((4,1), dtype = np.uint8)
        elif shapeType == 7:
            shape = np.ones((1,4), dtype = np.uint8)
        elif shapeType == 8: # square corner
            shape = np.ones((2,2), dtype = np.uint8)
            shape[0,0] = 0
        elif shapeType == 9:
            shape = np.ones((2,2), dtype = np.uint8)
            shape[0,1] = 0
        elif shapeType == 10:
            shape = np.ones((2,2), dtype = np.uint8)
            shape[1,0] = 0
        elif shapeType == 11:
            shape = np.ones((2,2), dtype = np.uint8)
            shape[1,1] = 0
        elif shapeType == 12: # square one sided long vertical
            shape = np.ones((3,2), dtype = np.uint8)
            shape[1:, 1] = 0
        elif shapeType == 13:
            shape = np.ones((3,2), dtype = np.uint8)
            shape[1:, 0] = 0
        elif shapeType == 14:
            shape = np.ones((3,2), dtype = np.uint8)
            shape[:2, 1] = 0
        elif shapeType == 15:
            shape = np.ones((3,2), dtype = np.uint8)
            shape[:2, 0] = 0
        elif shapeType == 16: # square one sided long horizontal
            shape = np.ones((2,3), dtype = np.uint8)
            shape[0, 1:] = 0
        elif shapeType == 17:
            shape = np.ones((2,3), dtype = np.uint8)
            shape[0, :2] = 0
        elif shapeType == 18:
            shape = np.ones((2,3), dtype = np.uint8)
            shape[1, 1:] = 0
        elif shapeType == 19:
            shape = np.ones((2,3), dtype = np.uint8)
            shape[1, :2] = 0

        elif shapeType == 20: # straight big corner
            shape = np.ones((3,3), dtype = np.uint8)
            shape[:2,1:] = 0
        elif shapeType == 21:
            shape = np.ones((3,3), dtype = np.uint8)
            shape[:2,:2] = 0
        elif shapeType == 22:
            shape = np.ones((3,3), dtype = np.uint8)
            shape[1:,1:] = 0
        elif shapeType == 23:
            shape = np.ones((3,3), dtype = np.uint8)
            shape[1:,:2] = 0
        elif shapeType == 24: # T shape
            shape = np.ones((3,3), dtype = np.uint8)
            shape[0,1:] = 0
            shape[2,1:] = 0
        elif shapeType == 25:
            shape = np.ones((3,3), dtype = np.uint8)
            shape[:2,0] = 0
            shape[:2,2] = 0
        elif shapeType == 26:
            shape = np.ones((3,3), dtype = np.uint8)
            shape[0,:2] = 0
            shape[2,:2] = 0
        elif shapeType == 27:
            shape = np.ones((3,3), dtype = np.uint8)
            shape[1:,0] = 0
            shape[1:,2] = 0
        elif shapeType == 28: #short T shape
            shape = np.ones((3,2), dtype = np.uint8)
            shape[0,1] = 0
            shape[2,1] = 0
        elif shapeType == 29:
            shape = np.ones((3,2), dtype = np.uint8)
            shape[0, 0] = 0
            shape[2, 0] = 0
        elif shapeType == 30:
            shape = np.ones((2,3), dtype = np.uint8)
            shape[1, 0] = 0
            shape[1, 2] = 0
        elif shapeType == 31:
            shape = np.ones((2,3), dtype = np.uint8)
            shape[0,0] = 0
            shape[0,2] = 0
        elif shapeType == 32: # S shape
            shape = np.ones((2,3), dtype = np.uint8)
            shape[0,0] = 0
            shape[1,2] = 0
        elif shapeType == 33:
            shape = np.ones((2,3), dtype = np.uint8)
            shape[1,0] = 0
            shape[0,2] = 0
        elif shapeType == 34:
            shape = np.ones((3,2), dtype = np.uint8)
            shape[0,1] = 0
            shape[2,0] = 0
        elif shapeType == 35:
            shape = np.ones((3,2), dtype = np.uint8)
            shape[0,0] = 0
            shape[2,1] = 0
        elif shapeType == 36: # square
            shape = np.ones((2,2), dtype = np.uint8)
        elif shapeType == 37: # cross 
            shape = np.zeros((3,3), dtype = np.uint8)
            shape[:,1] = 1
            shape[1,:] = 1
        return shape
    
    def insertionPossible(self, shape, x, y):
        shape_x = shape.shape[0]
        shape_y = shape.shape[1]
        if (x + shape_x) <= self.state['board'].shape[0] and (y + shape_y) <= self.state['board'].shape[1]:
            #check collision
            for local_x in range(0, shape_x):
                for local_y in range(0, shape_y):
                    if self.state['board'][x + local_x, y + local_y] == 1 and shape[local_x, local_y] == 1:
                        return False
        else:
            return False
        return True
        
    def thereAreOptions(self):
        thereAreOptions = False
        for shapeType in self.state['items']:
            if shapeType != 0:
                
                shape = self.getShape(shapeType)
                for x in range(0, self.state['board'].shape[0] - shape.shape[0]):
                    for y in range(0, self.state['board'].shape[1] - shape.shape[1]):
                        if self.insertionPossible(shape, x, y):
                            thereAreOptions = True
        return thereAreOptions
        
    def step(self, action):
        self.counter += 1
        reward = 0
        done = False
        
        shapeIndex = round(np.clip(action[0], 0, 1) * 2)
        x = round(np.clip(action[1], 0, 1) * 8)
        y = round(np.clip(action[2], 0, 1) * 8)
        

        # if chosen shape is not yet used
        if self.state['items'][shapeIndex] != 0:
            shapeType = self.state['items'][shapeIndex]
            self.state['items'][shapeIndex] = 0
            shape = self.getShape(shapeType)
            shape_x = shape.shape[0]
            shape_y = shape.shape[1]
            

            if self.insertionPossible(shape, x, y):
                reward += 3
                for local_x in range(0, shape_x):
                    for local_y in range(0, shape_y):
                        self.state['board'][x + local_x, y + local_y] += shape[local_x, local_y]
            else:
                reward = -3
        else:
            reward = -3
        
        # add reward for bingo
        reward += self.checkFull(0,3,0,3)
        reward += self.checkFull(0,3,3,6)
        reward += self.checkFull(0,3,6,9)
        reward += self.checkFull(3,6,0,3)
        reward += self.checkFull(3,6,3,6)
        reward += self.checkFull(3,6,6,9)
        reward += self.checkFull(6,9,0,3)
        reward += self.checkFull(6,9,3,6)
        reward += self.checkFull(6,9,6,9)
        
        # update shapes available
        if self.state['items'][0] == 0 and self.state['items'][0] == 0 and self.state['items'][0] == 0:
            self.state['items'][0] = random.randint(1,self.number_of_shapes)
            self.state['items'][1] = random.randint(1,self.number_of_shapes)
            self.state['items'][2] = random.randint(1,self.number_of_shapes)
            
        # check there are options            
        done = not self.thereAreOptions()
        if done:
            print('shit')
        info = {}
        flatState = np.zeros(84)
        idx = 0
        for line in self.state['board']:
            for el in line:
                flatState[idx] = el
                idx += 1
        flatState[81] = self.state['items'][0] / self.number_of_shapes
        flatState[82] = self.state['items'][1] / self.number_of_shapes
        flatState[83] = self.state['items'][2] / self.number_of_shapes
        
        return flatState, reward, done, info
    
    def reset(self):
        self.counter = 0
        self.state['board'] = np.zeros((9, 9), dtype = np.uint8)
        self.state['items'] = np.zeros(3, dtype = np.uint8)
        self.state['items'][0] = random.randint(1, self.number_of_shapes)
        self.state['items'][1] = random.randint(1, self.number_of_shapes)
        self.state['items'][2] = random.randint(1, self.number_of_shapes)
        flatState = np.zeros(84)
        idx = 0
        for line in self.state['board']:
            for el in line:
                flatState[idx] = el
                idx += 1
        flatState[81] = self.state['items'][0] / self.number_of_shapes
        flatState[82] = self.state['items'][1] / self.number_of_shapes
        flatState[83] = self.state['items'][2] / self.number_of_shapes
        return flatState
    
    def render(self, mode = 'shit'):
        pass

In [None]:
env = SquaresEnv()
# episodes = 10
# for episode in range(1, episodes + 1):
#     state = env.reset()
#     done = False
#     score = 0
    
#     while not done:
#         env.render()
#         action = env.action_space.sample()
#         #print(action)
#         n_state, reward, done, info = env.step(action)
#         if n_state is None:
#             print('fuck')
#         score+=reward
#     print('Episode:{} Score:{}'.format(episode, score))

# Create a deep learning model with keras

In [None]:
import numpy as np
import gym

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Activation, Flatten, Input, Concatenate
from tensorflow.keras.optimizers import Adam

from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess


nb_actions = env.action_space.shape[0]
print(env.observation_space.shape)


# Next, we build a very simple model.
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(128))
actor.add(Activation('relu'))
actor.add(Dense(64))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('linear'))
#print(actor.summary())

action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Concatenate()([action_input, flattened_observation])
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
#print(critic.summary())

In [None]:
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=200, nb_steps_warmup_actor=200,
                  random_process=random_process, gamma=.99, target_model_update=3e-3)
agent.compile(Adam(learning_rate=.003, clipnorm=1.), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env, nb_steps=1000000, visualize=True, verbose=1, nb_max_episode_steps=1000)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format("damn"), overwrite=True)



In [None]:
# Finally, evaluate our algorithm for 5 episodes.
agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=1000)