In [1]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np

    
## Action space

The blobble (agent) may choose to go zero or one step in the x direction (left or right) and zero or one step in the y direction (up or down). If the blobble hits the edge of Blobble world then it will 'bounce' off the wall.

The blobble may also choose to eat or not before moving. This action only has an effect if there is food at the blobble's current location.

## Observation space

The observation space is:

* the (x,y) coordinate of the agent. The blobble starts at (0,0) which is in the centre of blobble world

* the type of food (if any) at that location


## Reward

Reward is ```1``` for every step taken by the blobble.


## Done (end condition)

This is a ```boolean```. 

Blobble world is reset if the blobble's health dips below zero or if there is no remaining (healthy) food in blobble world. The blobble's health increases as it eats good food and decreases as it eats bad food. The blobble also becomes less healthy if it doesn't eat.

## Info 

This is a ```dict``` with useful information for debugging.



In [27]:
import io
from PIL import Image
import gym
from gym import spaces

class BlobbleEnv(gym.Env):
    
    metadata = {'render.modes': ['human', 'rgb']}
    
    def __init__(self):
        
        super(BlobbleEnv, self).__init__()
        
        self._MAX_HEALTH = 10
        self._START_HEALTH = 5
        max_bubble_size = 50
        self._SIZE_REF = 2. * self._MAX_HEALTH / (max_bubble_size ** 2) # For scaling blobbles and food
        self._MAX_LOC = 10
        self._MIN_LOC = -10
        self._MAX_FOOD = 100
        
        self._HEALTH_COLOURS =['rgb(243, 224, 247)', 
                              'rgb(228, 199, 241)',
                              'rgb(209, 175, 232)', 
                              'rgb(185, 152, 221)',
                              'rgb(159, 130, 206)', 
                              'rgb(130, 109, 186)',
                              'rgb(99, 85, 159)']
            
        self._NUTRITION_COLOURS = ['rgb(80, 59, 46)',  # 0 brown
                                   'rgb(168, 143, 66)', # 1 light brown
                                   'rgb(120, 198, 121)', # 2 light green
                                   'rgb(35, 132, 67)'    # 3 dark green
                                  ]
        
    def reset(self):
        
        # Create New Blobble
        self._blobble_location = np.array([0,0])     # Start Blobble in centre
        self._blobble_health = self._START_HEALTH    # Give Blobble start health
        
        # Reset Blobble food locations
        food_locs = np.unique(np.random.randint(self._MIN_LOC +1,
                                                      self._MAX_LOC -1, 
                                                      (2, self._MAX_FOOD)), axis = 1)
        
        self._food = np.vstack((food_locs, np.random.randint(0, 3, len(food_locs[0]))))
        
        self._food_colours = np.empty(len(self._food[0]), dtype = 'object')
        loc = 0
        for i in self._food[2]:
            self._food_colours[loc] = self._NUTRITION_COLOURS[i]
            loc = loc+1
        
    def step(self, action):
                
        # Let the blobble eat, if there is food
        if action['eat'] is True:
            # See whether there is food at this location
            for i in range(0, len(self._food[0])-1):
                if ((self._blobble_location[0] == self._food[0][i]) and 
                    (self._blobble_location[1] == self._food[1][i])):
                    # Get and delete the nutritional value 
                    self._blobble_health = self._blobble_health + ((self._food[2][i] -1) * 2)
                     # Delete the food
                    self._food = np.delete(self._food, i, 1)
                    self._food_colours = np.delete(self._food_colours, i)
                    print('yum')
                
        # Move the blobble
        self._blobble_location = self._blobble_location + action['movement']
        self._bounce() # bounce off the edges if required
        
        self._blobble_health = self._blobble_health + np.random.randint(-3, 3) # just random health change to start
        if self._blobble_health < 0:
            self._blobble_health = 0
        if self._blobble_health > self._MAX_HEALTH:
            self._blobble_health = self._MAX_HEALTH
        
        done = bool(self._blobble_health == 0) # or food left == 0?
        
        reward = 0.0
        if not done:
            reward = 1.0
        
        # return np.array(self.state), reward, done, {}
            
    def render(self, mode='human', close=False):

        # Depict the food locations
        fig = go.Figure(data = go.Scatter(
            x = self._food[0],
            y = self._food[1],
            mode='markers',
            marker=dict(
                sizeref=self._SIZE_REF,
                size=50,
                symbol='asterisk-open',
                color=self._food_colours)
            )
        )
             
        fig.add_trace(go.Scatter(
            x = [self._blobble_location[0]],
            y = [self._blobble_location[1]],
            text='Blobble',

            mode = 'markers',
            marker = dict(
                sizeref=self._SIZE_REF,
                size=50,
                color=self._HEALTH_COLOURS[int(self._blobble_health/self._MAX_HEALTH * (len(self._HEALTH_COLOURS) -1))],
                line_color='rgb(140, 140, 170)')
            )
        )
        
        
        fig.update_yaxes(automargin=True, range=[self._MIN_LOC, self._MAX_LOC], nticks=40)
        fig.update_xaxes(automargin=True, range=[self._MIN_LOC, self._MAX_LOC], nticks=40)
        
        if mode == 'human':
            fig.update_layout(
                    width = 700,
                    height = 600,
                    margin = dict(r=40, l=80, b=10, t=10))
            fig.show()
        elif mode == 'rgb':
            fig.update_layout(
                    width = 1408,
                    height = 1200,
                    margin = dict(r=40, l=80, b=10, t=10))
            img_as_bytes = fig.to_image(format='png')
            np_img = np.array(Image.open(io.BytesIO(img_as_bytes)))
            return(np_img)
        
    def render_print(self):
        print('Blobble details are:')
        print('  Health: ', self._blobble_health)
        print('  Position x : ', self._blobble_location[0])
        print('  Position y : ', self._blobble_location[1])
        print('  Remaining food :', len(self._food[0]))
        
    def _bounce(self):
        if self._blobble_location[0] > self._MAX_LOC:
            self._blobble_location[0] = self._MAX_LOC - (self._blobble_location[0] - self._MAX_LOC)
        if self._blobble_location[0] < self._MIN_LOC:
            self._blobble_location[0] = self._MIN_LOC + (self._MIN_LOC - self._blobble_location[0])
        if self._blobble_location[1] > self._MAX_LOC:
            self._blobble_location[1] = self._MAX_LOC - (self._blobble_location[1] - self._MAX_LOC)
        if self._blobble_location[1] < self._MIN_LOC:
            self._blobble_location[1] = self._MIN_LOC + (self._MIN_LOC - self._blobble_location[1])


In [28]:
blobble_env = BlobbleEnv()
blobble_env.reset()

In [29]:
blobble_env.render_print()

Blobble details are:
  Health:  5
  Position x :  0
  Position y :  0
  Remaining food : 86


In [30]:
action = { 
            'movement' : [0, -1],
            'eat' : True
        }
blobble_env.step(action)

yum


In [31]:
img_as_np = blobble_env.render()

In [7]:
import imageio as imageio
import base64
import IPython

def embed_mp4(filename):
    """Embeds an mp4 file in the notebook."""
    video = open(filename,'rb').read()
    
    b64 = base64.b64encode(video)
    tag = '''
    <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
    Your browser does not support the video tag.
    </video>'''.format(b64.decode())
    return IPython.display.HTML(tag)



In [8]:
max_step = 10

def create_blobble_video(video_filename, num_episodes=1, fps=30, max_iterations = 10):
    filename = video_filename + ".mp4"
    blobble_env = BlobbleEnv()
    
    with imageio.get_writer(filename, fps=fps) as video:
        for _ in range(num_episodes):
            blobble_env.reset()
            #video.append_data(blobble_env.render())
            for _ in range(max_iterations):
                #while not blobble_env.step_is_last():
                # action_step = blobble_env.action(time_step)
                
                action = {
                    'movement' : [np.random.randint(-1, 1), 
                                  np.random.randint(-1, 1)],
                    'eat' : True
                    }
                print('Next Action: ', action)
                blobble_env.step(action)
                blobble_env.render_print()
                video.append_data(blobble_env.render(mode='rgb'))
        return embed_mp4(filename)

In [9]:
create_blobble_video('blobble_video', fps=2, max_iterations = 50)

Next Action:  {'movement': [-1, 0], 'eat': True}
Blobble details are:
  Health:  7
  Position x :  -1
  Position y :  0
  Remaining food : 90
Next Action:  {'movement': [0, 0], 'eat': True}
Blobble details are:
  Health:  9
  Position x :  -1
  Position y :  0
  Remaining food : 90
Next Action:  {'movement': [0, 0], 'eat': True}
Blobble details are:
  Health:  8
  Position x :  -1
  Position y :  0
  Remaining food : 90
Next Action:  {'movement': [-1, -1], 'eat': True}
Blobble details are:
  Health:  10
  Position x :  -2
  Position y :  -1
  Remaining food : 90
Next Action:  {'movement': [0, -1], 'eat': True}
yum
Blobble details are:
  Health:  8
  Position x :  -2
  Position y :  -2
  Remaining food : 89
Next Action:  {'movement': [0, 0], 'eat': True}
Blobble details are:
  Health:  9
  Position x :  -2
  Position y :  -2
  Remaining food : 89
Next Action:  {'movement': [-1, -1], 'eat': True}
Blobble details are:
  Health:  9
  Position x :  -3
  Position y :  -3
  Remaining food : 8