In [8]:
import gym
from gym import spaces
import numpy as np

class CustomEnv(gym.Env):
    
    def __init__(self):
        
        self.observation_space = spaces.Box(low=0, high=1, shape=(3,))
        
        # there are only two action 0 or 1
        self.action_space = spaces.Discrete(2)
        
        
        self.state = np.zeros((3,))
        
        self.done = False
        
    def step(self, action):
        
        # `assert` is a built-in Python keyword that is used to check if a condition is True. If the condition is not True, it will raise an AssertionError with an optional error message.        
        assert self.action_space.contains(action)
        
        if action == 0:
            self.state[0] += 0.1
        else:
            self.state[0] -= 0.1
        
        reward = self.state[0]
        
        self.state[1:] = np.random.rand(2)
        self.done = self.state[0] >= 1
        
        return self.state, reward, self.done, {}
    
    def reset(self):
        self.state = np.zeros((3,))
        self.done = False
        return self.state


In [9]:
env = CustomEnv()

obs = env.reset()
done = False
while not done:
    action = env.action_space.sample()
    obs, reward, done, _ = env.step(action)
    print(obs, reward)


[-0.1         0.5855011   0.32640878] -0.1
[-0.2         0.93123767  0.25688591] -0.2
[-0.3         0.93505425  0.50077928] -0.30000000000000004
[-0.4         0.17398917  0.92132001] -0.4
[-0.3         0.78648622  0.19098465] -0.30000000000000004
[-0.4         0.20639327  0.05916273] -0.4
[-0.3         0.46237805  0.84932037] -0.30000000000000004
[-0.4         0.75841097  0.82157526] -0.4
[-0.3         0.01648754  0.53609975] -0.30000000000000004
[-0.2         0.78434576  0.9860543 ] -0.20000000000000004
[-0.1         0.66819153  0.95058612] -0.10000000000000003
[-0.2         0.7208291   0.02055381] -0.20000000000000004
[-0.3         0.70026411  0.02833948] -0.30000000000000004
[-0.4         0.33823958  0.10952978] -0.4
[-0.5         0.10411072  0.43204768] -0.5
[-0.6         0.4293643   0.39548192] -0.6
[-0.5         0.7711215   0.11017191] -0.5
[-0.6         0.65293389  0.11460693] -0.6
[-0.7         0.067132    0.01242821] -0.7
[-0.6         0.22623577  0.05534334] -0.6
[-0.7       

In [10]:
for i in range(10):
    print(env.action_space.sample())

0
1
1
0
1
1
0
0
0
1
