/
select_best_move.py
136 lines (105 loc) · 5.63 KB
/
select_best_move.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import math
from collections import defaultdict
import constants
from .evaluate import evaluate
from .find_state_instructions import get_all_state_instructions
WON_BATTLE = 100
def remove_guaranteed_opponent_moves(score_lookup):
"""This method removes enemy moves from the score-lookup that do not give the bot a choice.
For example - if the bot has 1 pokemon left, the opponent is faster, and can kill your active pokemon with move X
then move X for the opponent will be removed from the score_lookup
The bot behaves much better when it cannot see these types of decisions"""
move_combinations = list(score_lookup.keys())
if len(set(k[0] for k in move_combinations)) == 1:
return score_lookup
elif len(set(k[1] for k in move_combinations)) == 1:
return score_lookup
# find the opponent's moves where the bot has a choice
opponent_move_scores = dict()
opponent_decisions = set()
for k, score in score_lookup.items():
opponent_move = k[1]
if opponent_move not in opponent_move_scores:
opponent_move_scores[opponent_move] = score
elif opponent_move in opponent_move_scores and score != opponent_move_scores[opponent_move] and not math.isnan(score):
opponent_decisions.add(opponent_move)
# re-create score_lookup with only the opponent's move acquired above
new_opponent_decisions = dict()
for k, v in score_lookup.items():
if k[1] in opponent_decisions:
new_opponent_decisions[k] = v
return new_opponent_decisions
def pick_safest(score_lookup, remove_guaranteed=False):
modified_score_lookup = score_lookup
if remove_guaranteed:
modified_score_lookup = remove_guaranteed_opponent_moves(score_lookup)
if not modified_score_lookup:
modified_score_lookup = score_lookup
worst_case = defaultdict(lambda: (tuple(), float('inf')))
for move_pair, result in modified_score_lookup.items():
if worst_case[move_pair[0]][1] > result:
worst_case[move_pair[0]] = move_pair, result
safest = max(worst_case, key=lambda x: worst_case[x][1])
return worst_case[safest]
def move_item_to_front_of_list(l, item):
all_indicies = list(range(len(l)))
this_index = l.index(item)
all_indicies.remove(this_index)
all_indicies.insert(0, this_index)
return [l[i] for i in all_indicies]
def get_payoff_matrix(mutator, user_options, opponent_options, depth=2, prune=True):
"""
:param mutator: a StateMutator object representing the state of the battle
:param user_options: options for the bot
:param opponent_options: options for the opponent
:param depth: the remaining depth before the state is evaluated
:param prune: specify whether or not to prune the tree
:return: a dictionary representing the potential move combinations and their associated scores
"""
winner = mutator.state.battle_is_finished()
if winner:
return {(constants.DO_NOTHING_MOVE, constants.DO_NOTHING_MOVE): evaluate(mutator.state) + WON_BATTLE*depth*winner}
depth -= 1
# if the battle is not over, but the opponent has no moves - we want to return the user options as moves
# this is a special case in a random battle where the opponent's pokemon has fainted, but the opponent still
# has reserves left that are unseen
if opponent_options == [constants.DO_NOTHING_MOVE] and mutator.state.opponent.active.hp == 0:
return {(user_option, constants.DO_NOTHING_MOVE): evaluate(mutator.state) for user_option in user_options}
state_scores = dict()
best_score = float('-inf')
for i, user_move in enumerate(user_options):
worst_score_for_this_row = float('inf')
skip = False
# opponent_options can change during the loop
# using opponent_options[:] makes a copy when iterating to ensure no funny-business
for j, opponent_move in enumerate(opponent_options[:]):
if skip:
state_scores[(user_move, opponent_move)] = float('nan')
continue
score = 0
state_instructions = get_all_state_instructions(mutator, user_move, opponent_move)
if depth == 0:
for instructions in state_instructions:
mutator.apply(instructions.instructions)
t_score = evaluate(mutator.state)
score += (t_score * instructions.percentage)
mutator.reverse(instructions.instructions)
else:
for instructions in state_instructions:
this_percentage = instructions.percentage
mutator.apply(instructions.instructions)
next_turn_user_options, next_turn_opponent_options = mutator.state.get_all_options()
safest = pick_safest(get_payoff_matrix(mutator, next_turn_user_options, next_turn_opponent_options, depth=depth, prune=prune))
score += safest[1] * this_percentage
mutator.reverse(instructions.instructions)
state_scores[(user_move, opponent_move)] = score
if score < worst_score_for_this_row:
worst_score_for_this_row = score
if prune and score < best_score:
skip = True
# MOST of the time in pokemon, an opponent's move that causes a prune will cause a prune elsewhere
# move this item to the front of the list to prune faster
opponent_options = move_item_to_front_of_list(opponent_options, opponent_move)
if worst_score_for_this_row > best_score:
best_score = worst_score_for_this_row
return state_scores