In [1]:
import gym
from gym import spaces
import numpy as np

class CustomEnv(gym.Env):
    
    # define initial function
    def __init__(self):
        
        # define state space
        self.observation_space = spaces.Box(low=0, high=1, shape=(3,))
        
        # define action_space
        self.action_space = spaces.Discrete(2)
        
        # set initial state
        self.state = np.zeros((3,))
        
        # set done
        self.done = False
    
    # define the step function
    def step(self, action):
        
        # 
        assert self.action_space.contains(action)
        
        if action == 0:
            self.state[0] += 0.1
        else:
            self.state[0] -= 0.1
        
        self.state[1:] = np.random.rand(2)
        self.done = self.state[0] >= 1
        
        reward = self._calculate_reward()
        
        return self.state, reward, self.done, {}
    
    def reset(self):
        
        self.state = np.zeros((3,))
        
        self.done = False
        
        return self.state
    
    def _calculate_reward(self):
        
        # custom reward function
        return 1 / (1 + np.exp(-self.state[0]))


In [2]:
# create environment
env = CustomEnv()

# reset environment
obs = env.reset()

# initial done
done = False

# test environment
while not done:
    
    # random generate action base on the sample
    action = env.action_space.sample()
    
    # set action to the environment and observation state, reward, done, and infor
    obs, reward, done, _ = env.step(action)
    
    # print result
    print(obs, reward)


[-0.1         0.47615251  0.11986689] 0.47502081252106
[0.         0.72603116 0.50717835] 0.5
[0.1        0.05963165 0.28688233] 0.52497918747894
[0.2       0.1831982 0.7860376] 0.549833997312478
[0.3        0.00602025 0.31962038] 0.574442516811659
[0.2        0.69392817 0.32456595] 0.549833997312478
[0.3        0.82869795 0.57632991] 0.574442516811659
[0.4        0.98623059 0.936588  ] 0.598687660112452
[0.5        0.74405936 0.68487161] 0.6224593312018546
[0.6        0.48222171 0.67331209] 0.6456563062257954
[0.5        0.40722267 0.95537086] 0.6224593312018546
[0.4        0.58390053 0.09206551] 0.598687660112452
[0.3        0.7088766  0.57148182] 0.574442516811659
[0.4        0.82308632 0.46484289] 0.598687660112452
[0.5        0.07563124 0.06494506] 0.6224593312018546
[0.4        0.16803627 0.72110906] 0.598687660112452
[0.5        0.53364317 0.39104184] 0.6224593312018546
[0.6        0.49735768 0.67608783] 0.6456563062257954
[0.5        0.94288023 0.04943354] 0.6224593312018546
[0