-
Notifications
You must be signed in to change notification settings - Fork 9
/
cost_functions.py
86 lines (61 loc) · 2.17 KB
/
cost_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import numpy as np
#========================================================
#
# Environment-specific cost functions:
#
def reacher_cost_fn(state, action, next_state):
if len(state.shape) > 1:
scores=np.zeros((state.shape[0],))
scores += np.linalg.norm(next_state[:,-1])
return scores
score = np.linalg.norm(next_state[-1])
return score
def pendulum_cost_fn(state, action, next_state):
if len(state.shape) > 1:
scores=np.zeros((state.shape[0],))
scores += np.abs(next_state[:,1]) #+ 0.001 * action**2
return scores
score = np.abs(next_state[1]) #+ 0.001 * action**2
return score
def cheetah_cost_fn(state, action, next_state):
if len(state.shape) > 1:
heading_penalty_factor=10
scores=np.zeros((state.shape[0],))
#dont move front shin back so far that you tilt forward
front_leg = state[:,5]
my_range = 0.2
scores[front_leg>=my_range] += heading_penalty_factor
front_shin = state[:,6]
my_range = 0
scores[front_shin>=my_range] += heading_penalty_factor
front_foot = state[:,7]
my_range = 0
scores[front_foot>=my_range] += heading_penalty_factor
scores-= (next_state[:,17] - state[:,17]) / 0.01 #+ 0.1 * (np.sum(action**2, axis=1))
return scores
heading_penalty_factor=10
score = 0
#dont move front shin back so far that you tilt forward
front_leg = state[5]
my_range = 0.2
if front_leg>=my_range:
score += heading_penalty_factor
front_shin = state[6]
my_range = 0
if front_shin>=my_range:
score += heading_penalty_factor
front_foot = state[7]
my_range = 0
if front_foot>=my_range:
score += heading_penalty_factor
score -= (next_state[17] - state[17]) / 0.01 #+ 0.1 * (np.sum(action**2))
return score
#========================================================
#
# Cost function for a whole trajectory:
#
def trajectory_cost_fn(cost_fn, states, actions, next_states):
trajectory_cost = 0
for i in range(len(actions)):
trajectory_cost += cost_fn(states[i], actions[i], next_states[i])
return trajectory_cost