In [2]:
import gym
from gym import spaces
import numpy as np

In [3]:
class Orbit(gym.Env):
    def __init__(self, max_step):
        #self.directions = (-1, 1, 0) #speed down, up, keep
        self.max_step = max_step
        # action space: velocity change
        self.action_space = spaces.Box(low=-5,high=5,shape=(1,))
        # observation space: (velocity, radius)
        self.observation_space = spaces.Tuple((spaces.Box(low=-50,high=50,shape=(1,)), spaces.Box(low=-23,high=23,shape=(1,))))
        self.step_count = 0
        self.G = 6.67259*10**(-17) # N km^2 / kg^2
        self.M = 1.989*10**30 # kg
        self.m = 100 # i dont know
        self.r_mars = 2.2794*10**8 # km
        self.reset()
        
    def reset(self):
        v_escape = 11.2 # km/s
        r_earth = 1.4959787*10**8 # km
        self.state = (v_escape, -r_earth)
        self.step_count = 0
        return self.state
        
    def step(self, action):
        assert self.action_space.contains(action)
        done = False
        self.step_count += 1
        if self.step_count==self.max_step:
            done = True
        
        v0, r0 = self.state
        v1 = v0 + action
        a = 1/(2/r0-v1**2*1000/self.G/self.M) # km
        dE = 1/2/self.m/np.abs(v0**2-v1**2)
        if r0>0:
            r1 = -2*a+r0
        else:
            r1 = 2*a+r0
        r2 = v1*r0/r1
        self.state = (v1[0], r2[0])
        if np.abs(r2)==self.r_mars:
            done = True
            reward = 10000
        else:
            reward = -dE
        return self.state, reward, done, {}

In [4]:
env = Orbit(10)
print(env.state)
#action = env.action_space.sample()
action = np.array([3.34])
print(action)
obs, R, done, _ = env.step(action)
print(obs)
action = np.array([2.65])
print(action)
obs, R, done, _ = env.step(action)
print(obs)

(11.2, -149597870.0)
[3.34]
(14.54, 7.678759314848292)
[2.65]
(17.189999999999998, -2010908373.646385)
