-
Notifications
You must be signed in to change notification settings - Fork 0
/
PVTOL_ref_env.py
91 lines (65 loc) · 2.14 KB
/
PVTOL_ref_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf-8 -*-
"""
@author: Mohsin Sarwari
Last Update: 09/18/21
"""
import numpy as np
import gym
import time
from gym import spaces
import matplotlib
import matplotlib.pyplot as plt
class PVTOL_env():
"""
PVTOL Env
System:
x_ddot = -sin(theta)u_1 + eps cos(theta)u_2
y_ddot = cos(theta)u_1 + eps sin(theta)u_2 - 1
theta_ddot = u2
State:
[x, y, theta, x_dot, y_dot, theta_dot]
Parameters:
eps: coupling between rolling and lateral movement
dt: time between steps
init_low: lower bound on randomizing the state
init_high: upper bound on randomizing the state
test: indication if this environment is used to evaluate the model
initial_state: fixed initial state for test environment (contains state variables then derivatives in same order)
"""
def __init__(self, param_dict):
self.param_dict = param_dict
if self.param_dict["test"]:
self.state = self.param_dict["initial_state_dynamic"]
else:
self.state = np.random.randint(low=self.param_dict["init_low"], high=self.param_dict["init_high"], size=6)
#Size of state [x, y, theta, x_dot, y_dot, theta_dot]
def size(self):
return 6
def step(self, action):
u1 = action[0]
u2 = action[1]
x = self.state[0]
y = self.state[1]
theta = self.state[2]
x_dot = self.state[3]
y_dot = self.state[4]
theta_dot = self.state[5]
eps = self.param_dict["eps"]
derivatives = np.array([x_dot, y_dot, theta_dot,
(-np.sin(theta)*u1) + (eps*np.cos(theta)*u2),
(np.cos(theta)*u1) + (eps*np.sin(theta)*u2) -1,
u2])
self.state = self.state + (self.param_dict["dt"] * derivatives)
return self.state
def get_learned_pos(self):
return (self.state[0], self.state[1])
def get_zero(self):
return self.state[2]
def get_input_size(self):
return 2
def reset(self):
if self.param_dict["test"]:
self.state = self.param_dict["initial_state_dynamic"]
else:
self.state = np.random.randint(low=self.param_dict["init_low"], high=self.param_dict["init_high"], size=6)
return self.state