diff --git a/gym/envs/classic_control/cartpole.py b/gym/envs/classic_control/cartpole.py index dbc9b31cb5..16324ead14 100644 --- a/gym/envs/classic_control/cartpole.py +++ b/gym/envs/classic_control/cartpole.py @@ -62,6 +62,7 @@ def __init__(self): self.polemass_length = (self.masspole * self.length) self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates + self.kinematics_integrator = 'euler' # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * math.pi / 360 @@ -97,10 +98,16 @@ def step(self, action): temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass)) xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass - x = x + self.tau * x_dot - x_dot = x_dot + self.tau * xacc - theta = theta + self.tau * theta_dot - theta_dot = theta_dot + self.tau * thetaacc + if self.kinematics_integrator == 'euler': + x = x + self.tau * x_dot + x_dot = x_dot + self.tau * xacc + theta = theta + self.tau * theta_dot + theta_dot = theta_dot + self.tau * thetaacc + else: # semi-implicit euler + x_dot = x_dot + self.tau * xacc + x = x + self.tau * x_dot + theta_dot = theta_dot + self.tau * thetaacc + theta = theta + self.tau * theta_dot self.state = (x,x_dot,theta,theta_dot) done = x < -self.x_threshold \ or x > self.x_threshold \