-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathminimax.py
More file actions
69 lines (48 loc) · 1.86 KB
/
minimax.py
File metadata and controls
69 lines (48 loc) · 1.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
def is_terminal(battle_data) -> bool:
return battle_data["ended"]
def get_side_value(side) -> float:
return sum(pokemon["hp"] / pokemon["maxhp"] for pokemon in side["pokemon"])
def get_heuristic_value(battle_data):
sides = battle_data["sides"]
return get_side_value(sides[1]) - get_side_value(sides[0])
def alpha_beta(env, battle, depth, alpha, beta, player_idx, last_move):
client = env.client
battle_id = battle["id"]
battle_data = battle["data"]
next_player_idx = (player_idx + 1) % 2
if depth == 0 or is_terminal(battle_data):
return get_heuristic_value(battle_data), None
best_move_idx = None
if player_idx == 0:
value = -float("inf")
for move_idx in battle["actions"][1]:
successor_value, _ = alpha_beta(
env, battle, depth, alpha, beta, next_player_idx, env.get_move(move_idx)
)
if successor_value > value:
value = successor_value
best_move_idx = move_idx
alpha = max(alpha, value)
if alpha >= beta:
break
return value, best_move_idx
else:
value = float("inf")
for move_idx in battle["actions"][0]:
successor = client.do_move(battle_id, env.get_move(move_idx), last_move)
successor_value, _ = alpha_beta(
env, successor, depth - 1, alpha, beta, next_player_idx, None
)
if successor_value < value:
value = successor_value
best_move_idx = move_idx
beta = min(beta, value)
if alpha >= beta:
break
return value, best_move_idx
def agent(env, depth=1):
_, best_move_idx = alpha_beta(
env, env.current_battle, depth, -float("inf"), float("inf"), 0, None
)
return best_move_idx