/
high_level_env.py
150 lines (121 loc) · 4.32 KB
/
high_level_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import battle_ai
import numpy as np
import gym
import math
import time
from gym import spaces
class HighLevelEnv(gym.Env):
metadata = {'render.modes': ['human']}
def __init__(self, level_name):
super(HighLevelEnv, self).__init__()
self.world = battle_ai.World()
if not self.world.load_from_file(level_name):
raise 'unable to load file'
self.agent = battle_ai.HighLevelAgent()
self.other_agent = battle_ai.SimpleAgent()
battle_ai.replace_agent(self.world, 0, self.agent)
battle_ai.replace_agent_simple(self.world, 1, self.other_agent)
pup_times = battle_ai.powerup_times(self.world)
obs = self._get_state()
# For simplicity, assume we can move in one of 4-directions or towards a powerup
n_actions = 4 + len(pup_times)
self.action_space = spaces.Discrete(n_actions)
self.observation_space = spaces.Box(low=-1, high=1, shape=(len(obs),), dtype=np.float32)
self.window = None
self.current_step = 0
self.render_all = False
self.level_name = level_name
def _get_state(self):
return self.agent.observe(self.world)
def reset(self):
# Initialize agent to be the 0-th spawn point
self.world.reset()
self.agent.reset()
self.current_step = 0
self.num_kills = 0
self.num_deaths = 0
return self._get_state()
def step(self, action):
# Update action to MOVE in direction d
health_before = self.agent.health()
armor_before = self.agent.armor()
other_health_before = self.other_agent.health()
other_armor_before = self.other_agent.armor()
last_action = self.agent.last_action()
self.agent.set_action(action)
# Move half a second in world time.
for i in range(0, 15):
self.world.step(1.0 / 30.0)
if self.render_all:
self.render('human', delay=1.0/90.0)
moved = self.agent.moved_distance()
reward_weights = {
"no_motion": 1e-3,
"health": 1.0/200.0,
"armor": 1.0/200.0,
"other_health": 1.0/10000*0,
"other_armor": 1.0/10000*0,
"kill": 1,
"die": -0.01,
"change_plan": 0.001,
"not_powerup": 0.05,
}
# Reward proportional to maximum velocity moved
reward = 0
# Small negative reward for not moving
if moved < 1e-6:
reward -= reward_weights['no_motion']
# Small reward for getting health/armor?
health_delta = self.agent.health() - health_before
armor_delta = self.agent.armor() - armor_before
if health_delta > 0:
print('health: %f' % health_delta)
reward += health_delta * reward_weights['health']
if armor_delta > 0:
print('armor: %f' % armor_delta)
reward += armor_delta * reward_weights['armor']
other_health_delta = self.other_agent.health() - other_health_before
other_armor_delta = self.other_agent.armor() - other_armor_before
if other_health_delta < 0:
reward -= other_health_delta * reward_weights['other_health']
if other_armor_delta < 0:
reward -= other_armor_delta * reward_weights['other_armor']
# Small negative reward for changing plan.
if (last_action >= 4) and (last_action != action):
reward -= reward_weights['change_plan']
if action < 4:
reward -= reward_weights['not_powerup']
# Big reward for killing opponent
if self.agent.health() < 0:
reward += reward_weights['die']
print('died')
self.num_deaths += 1
self.world.reset_agent(0)
if self.other_agent.health() < 0:
reward += reward_weights['kill']
print('killed other')
self.num_kills += 1
self.world.reset_agent(1)
self.current_step += 1
time_limit = 800
done = (self.current_step >= time_limit)
info = {'moved': moved}
return self._get_state(), reward, done, info
def render(self, mode='console', delay=0):
if mode != 'human':
raise NotImplementedError()
if not self.window:
if False:
self.window = battle_ai.SimpleWindow()
else:
self.window = battle_ai.OgreWin(self.level_name + ".mesh")
self.window.set_world(self.world)
self.window.set_info_string('Kills: %d Deaths: %d [%d]' % (self.num_kills, self.num_deaths, self.agent.last_action()))
self.window.refresh()
for i in range(0, 10):
self.window.process_events()
self.window.draw_scene()
if delay > 0:
time.sleep(delay)
def close(self):
pass