-
Notifications
You must be signed in to change notification settings - Fork 9
/
player.py
1088 lines (951 loc) · 35.1 KB
/
player.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
from __future__ import print_function
from constants import *
from itertools import combinations
from itertools import zip_longest
from itertools import product
from collections import Counter
from copy import copy, deepcopy
import numpy as np
from ai import NETWORK_HYPERPARAMETERS
from ai import SplendorAI
Q_LOADINGS = NETWORK_HYPERPARAMETERS['output_layers']
"""
uncommon strategies not supported:
* using gold instead of a regular color in order to prevent a player from grabbing one or two gems of a particular color
on their turn (fairly uncommon)
* telling a player what your hidden card is so that it is more obvious to them that you can
purchase something/are more likely to purchase something that may further their interests in some way (very rare)
* take 1 gem each from 2 separate piles to allow another player to take a certain color or to allow yourself to take 3 next turn (never seen it)
"""
# TODO : test run
def wrap_if_not_list(x):
if isinstance(x, (list, tuple)):
return x
else:
return [x]
# used to convert score weighting to positive weights
def elu(x):
if x < 500:
return np.log1p(np.exp(x))
else:
return x
def normalize(x):
return np.asarray(x)/np.sum(x)
def get_phase_parameters(phase):
"""
training will be divided into 5 phases
"""
if phase==1:
return {
'Q1': 0.5,
'Q3': 0.3,
'Q5': 0.15,
'win': 0.05,
}
elif phase==2:
return {
'Q1': 0.4,
'Q3': 0.25,
'Q5': 0.2,
'win': 0.15,
}
elif phase==3:
return {
'Q1': 0.25,
'Q3': 0.25,
'Q5': 0.25,
'win': 0.25,
}
elif phase==4:
return {
'Q1': 0.15,
'Q3': 0.2,
'Q5': 0.35,
'win': 0.3,
}
elif phase==5:
return {
'Q1': 0.05,
'Q3': 0.1,
'Q5': 0.35,
'win': 0.50,
}
class Player(object):
def __init__(
self,
game,
id,
order,
ai=None,
decision_weighting=None,
temperature=1,
record_plain_history=False,
hyperparameters=None,
compares_self_to_others=['no','yes','both'],
):
"""
game - a Game object that this player is attached to
id - a unique ID for the player
ai - a premade SplendorAI object (optional; one is made if not provided)
decision_weighting - the weights used for each of the player's outputs for decisions
temperature - the randomness of the player's decisions
record_plain_history - if true, records JSON of the game state at each turn (uses a lot of extra memory)
hyperparameters - network hyperparameters for neural network; see ai.py for default parameters
compares_self_to_others - 'no', 'yes', or 'both'; will weight score by lead if 'yes'; will take average of score and lead if 'both'
"""
if isinstance(compares_self_to_others, list):
compares_self_to_others = compares_self_to_others[0]
self.game = game
# this is important for keeping track of which player has which id
self.id = id
self.order = order
if ai is not None:
self.ai = ai
else:
if hyperparameters is not None:
self.ai = SplendorAI(id=id, game=game, **hyperparameters)
else:
self.ai = SplendorAI(id=id, game=game)
self.points = 0
# this should be retrieved via get_phase_parameters()
if decision_weighting is None:
self.decision_weighting = get_phase_parameters(1)
else:
self.decision_weighting=decision_weighting
self.temperature = temperature
self.record_plain_history = record_plain_history
#cards that contribute to cost reduction and points
self.owned_cards = []
#cards that can be purchased only by player
self.reserved_cards = []
#faster way of keeping track of cards
self.n_cards = 0
self.n_reserved_cards = 0
# updating this isn't implemented yet
self.n_reserved_cards_tiers = [0 for _ in range(3)]
self.gems = ColorCombination(True, **{color:0 for color in COLOR_ORDER})
self.n_gems = 0
self.discount = ColorCombination(**{color:0 for color in COST_COLOR_ORDER})
self.objectives = []
self.total_times_reserved = Counter()
self.total_gem_takes = Counter()
self.win = False
#self.draw = False#will allow multiple victories in rare instances
self.compares_self_to_others = compares_self_to_others
## describes the history for the current game
# this should have a dict of basic game information
# describes q state at the beginning of a turn
self.q_state_history = []
self.lagged_q_state_history = []
# describes the actions in plain terms
self.plain_action_history = []
# these are the raw serializations used for each player in a turn
self.serialized_action_history = []
## describes serialized history for all games; this will be used to train the neural network
#this should be in [[data, win, game_id], ...] format
self.extended_serialized_action_history = []
self.extended_plain_action_history = []
self.extended_lagged_q_state_history = []
# this describes the response variable at each time step
# this should be of the format [{'win': value, 'Q1': value, 'Q3': value, 'Q5': value}, ...]
# the reason they aren't all put into one is that extending the array requires copying and that's very expensive to
# do over thousands of simulations
# this is only calculated at the end of the game when all values of Q1, Q3, Q5, and win state can be certain
self.extended_output = []
def set_game(self, game):
self.game = game
self.ai.game = game
def set_decision_weighting(self, weights):
self.decision_weighting = weights
def set_temperature(self, temperature):
self.temperature=temperature
def get_other_players(self):
"""
run when initializing the game to make it easy to access other players
for serializing
"""
n_players = len(self.game.players)
if self.order==0:
self.other_players = self.game.players[1:]
elif self.order < n_players - 1:
self.other_players = self.game.players[self.order+1:] + self.game.players[:self.order]
else:
self.other_players = self.game.players[:self.order]
def take_turn(self):
"""
TODO: record player state at end of turn after action is made
the player takes a turn
"""
self.record_q_state()
self.make_turn_action()
self.objective_check()
if not self.game.last_turn:
self.victory_check()
def make_choice(self, score):
"""
uses temperature to randomly decide on decision index
based on Boltzmann Distribution
"""
# at T=1, minimum probability for action is about 1/20,000
score_ = np.maximum(score - np.max(score), -10)
score = score_ + -1e100 * (score == -1e100)
# condition prevents underflow errors
if self.temperature < 0.005:
choice = np.argmax(score)
else:
score = np.exp(score/self.temperature)
choice = np.random.choice(
np.arange(len(score)),
p=score/np.sum(score)
)
# worst-case scenario with probability fudging
if score[choice] < -1e50:
for i in range(3):
if score[i] > -1e50:
return i
return choice
def decide_on_action(self, purchasing_options, reserving_options, gem_taking_options):
# determine parameters used to weight the 3 categories amongst themselves, then the options among the categories
# first, use group averaging to decide among 3 possible options (if valid) among outcome
if False:
print(type(purchasing_options))
print(type(reserving_options))
print(type(gem_taking_options))
if purchasing_options is None:
purchasing_weight = -1e100
else:
purchasing_scores = purchasing_options['score'] # [option['score'] for option in purchasing_options]
purchasing_weight = np.max(purchasing_scores)
if reserving_options is None:
reserving_weight = -1e100
else:
reserving_scores = reserving_options['score'] # [option['score'] for option in reserving_options]
reserving_weight = np.max(reserving_scores)
if gem_taking_options is None:
gem_taking_weight = -1e100
else:
gem_scores = gem_taking_options['score'] # [option['score'] for option in gem_taking_options]
gem_taking_weight = np.max(gem_scores)
if all([x is None for x in [purchasing_options, reserving_options, gem_taking_options]]):
print("WARNING: NO ACTIONS CAN BE TAKEN!")
return None, None, self.full_serializations(None, None, None)[0]
action = ['purchase','reserve','take_gems'][self.make_choice(
np.asarray([purchasing_weight, reserving_weight, gem_taking_weight])
)]
#print(action)
if action=='purchase':
which_purchase = self.make_choice(purchasing_scores)
serialization = purchasing_options['serializations'][which_purchase]
return (
action,
{
'card_changes': purchasing_options['actions']['card_changes'][which_purchase],
'gem_changes': purchasing_options['actions']['gem_changes'][which_purchase],
},
serialization
)
elif action=='reserve':
which_reserve = self.make_choice(reserving_scores)
serialization = reserving_options['serializations'][which_reserve]
reservation_change = reserving_options['actions']['reservation_changes'][which_reserve]
# log reservation
self.total_times_reserved[(reservation_change['tier'], reservation_change['type'])] += 1
return (
action,
{
'reservation_changes': reservation_change,
'gem_changes': reserving_options['actions']['gem_changes'][which_reserve],
},
serialization,
)
elif action=='take_gems':
which_gems = self.make_choice(gem_scores)
serialization = gem_taking_options['serializations'][which_gems]
gem_changes = gem_taking_options['actions']['gem_changes'][which_gems]
self.total_gem_takes[gem_changes.count()]+=1
return (
action,
{
'gem_changes': gem_changes,
},
serialization
)
def make_turn_action(self):
"""
"""
purchasing_options = self.simulate_purchasing_options()
reserving_options = self.simulate_reserving_options()
gem_taking_options = self.simulate_gem_taking_options()
# now determine best course of action based on q-weighting
action_type, action_params, serialization = self.decide_on_action(
purchasing_options=purchasing_options,
reserving_options=reserving_options,
gem_taking_options=gem_taking_options
)
if action_type is None:
print(':-(')
elif action_type == 'purchase':
self.purchase_card(action_params)
elif action_type == 'reserve':
self.reserve_card(action_params)
elif action_type == 'take_gems':
self.take_gems(action_params['gem_changes'])
# HISTORY UPDATED
self.serialized_action_history.append(serialization)
if self.record_plain_history:
self.plain_action_history.append({'action_type': action_type, 'action_params': action_params})
"""
BELOW ARE SIMULATION FUNCTIONS
for each of these functions, the following should be done:
1. all possible moves should be determined
2. for each possible move, a semi-simulation should be made of the resulting board state.
I recommend implementing & using save_state()/load_state() for both the Player and Game classes
so that you can directly modify these; I have put a "move_cards" argument to some functions so that they won't
shift around cards if you want to avoid that; you should use copy/deepcopy to copy lists/nested objects
when creating these states
3. for each state, there should be a serialization of the entire game from that player's point of view. For example,
a game of texas hold 'em would have a binary (1s and 0s) vector of length 52 describing your hand, a binary vector
describing the state of the cards in the center, and perhaps some other vectors describing betting/folding of
the other players (since you can't see their cards). In this game, you may be able to use integer values for gem costs.
4. the SplendorAI class needs to be further developed, but it will take in these serializations and output the probabilities;
With these probabilities, a decision should be made by processing them through determine_best_option(). Initially I want the
actual probability to have less impact on the chosen decision in order to increase randomness, but as the network becomes
trained, I want the better probabilities to have a much higher chance of being chosen
5. The chosen action will be taken with the keyword arguments provided in the option, and the turn will continue.
"""
def calculate_score(self, predictions):
win_prediction = predictions['win_prediction']
Q1_prediction = predictions['q_predictions'][0]
Q3_prediction = predictions['q_predictions'][1]
Q5_prediction = predictions['q_predictions'][2]
score = (
(win_prediction * 15) * self.decision_weighting['win'] +
Q1_prediction * self.decision_weighting['Q1'] +
Q3_prediction * self.decision_weighting['Q3'] +
Q5_prediction * self.decision_weighting['Q5']
)
return score
def simulate_purchasing_options(self):
"""
creates all possible purchasing options and returns corresponding probabilities and action types
FORMAT:
list of
[prob, action_kwargs, action_type]
"""
purchasing_options = []
payment_options = []
# cards on board
for tier in [1,2,3]:
available_cards = self.game.get_available_cards(tier=tier)
deck_cards = self.game.get_deck(tier=tier)
can_be_replaced = len(deck_cards) > 0
for i, card in enumerate(available_cards):
net_cost = (card['cost'] - self.discount).truncate_negatives()
if self.gems.can_pay_for(net_cost):
payment_options.append(self.gems.calculate_actual_cost(net_cost))
card_purchase = {
'position': i,
'tier': tier,
'can_be_replaced': can_be_replaced,
'type': 'board',
'card': card
}
purchasing_options.append(card_purchase)
# reserved cards
for i, card in enumerate(self.reserved_cards):
if card is not None:
net_cost = (card['cost'] - self.discount).truncate_negatives()
if self.gems.can_pay_for(net_cost):
payment_options.append(self.gems.calculate_actual_cost(net_cost))
card_purchase = {
'position': i,
'tier': tier,
'can_be_replaced': False,
'type': 'reserved',
'card': card
}
purchasing_options.append(card_purchase)
if len(purchasing_options) > 0:
purchasing_serializations = self.full_serializations(
gem_changes=payment_options,
card_changes=purchasing_options,
)
predictions = self.ai.make_predictions(purchasing_serializations)
return {
'predictions': predictions,
'score': self.calculate_score(predictions),
'serializations': purchasing_serializations,
'actions': {
'gem_changes': payment_options,
'card_changes': purchasing_options,
}
}
else:
return None
def simulate_reserving_options(self):
"""
creates all possible reserving options and returns corresponding probabilities and action types
FORMAT:
list of
[prob, action_kwargs, action_type]
"""
# skip if 3 cards already reserved
if len(self.reserved_cards) == 3:
return None
# cannot get gold if none exists or you have 10 gems
if self.game.gems.gold==0 or self.gems.count() == 10:
gem_change = ColorCombination(uses_gold=True)
else:
gem_change = ColorCombination(gold=1,uses_gold=True)
reservation_options = []
# cards on board
for tier in [1,2,3]:
tier_cards = self.game.get_available_cards(tier=tier)
for position, card in enumerate(tier_cards):
if card is not None:
reservation_options.append({'tier':tier, 'position':position, 'type':'board', 'card': card})
# cards on top of deck
for tier in [1,2,3]:
if len(self.game.get_deck(tier=tier)) > 0:
reservation_options.append({'tier':tier, 'position': 0, 'type':'topdeck', 'card':make_blank_card(tier)})
if len(reservation_options) > 0:
gem_changes = [gem_change] * len(reservation_options)
reservation_serializations = self.full_serializations(
gem_changes=gem_changes,
reservation_changes = reservation_options
)
# print(reservation_serializations)
predictions = self.ai.make_predictions(reservation_serializations)
return {
'predictions':predictions,
'score': self.calculate_score(predictions),
'serializations': reservation_serializations,
'actions': {
'gem_changes': gem_changes,
'reservation_changes':reservation_options
}
}
else:
return None
def simulate_gem_taking_options(self):
"""
creates all possible gem-taking options and returns corresponding probabilities and action types
FORMAT:
list of
[prob, action_kwargs, action_type]
"""
# if no gems, provide an out in case no moves can be made
if self.game.gems.count_nongold() == 0:
gem_combinations = [ColorCombination(True)]
else:
# determine all combinations
gem_combinations = self.take_gems_options()
if len(gem_combinations) > 0:
gem_serializations = self.full_serializations(gem_changes=gem_combinations)
predictions = self.ai.make_predictions(gem_serializations)
return {
'predictions': predictions,
'score': self.calculate_score(predictions),
'serializations': gem_serializations,
'actions': {
'gem_changes': gem_combinations,
}
}
else:
return None
def save_state(self):
"""
I recommend using this for storing a few attributes when simulating so you
can directly modify some of the player attribute variables;
you can also have game.save_state() called from here
"""
def load_state(self):
"""
loads previous state after self.save_state() has been called;
you can alsoe have game.load_state() called from here
"""
#GENERAL CHECKING AND ACTION FUNCTIONS
def get_available_cards(self, tier):
"""
does not include reserved cards
"""
if tier==1:
cards = self.game.available_tier_1_cards
elif tier==2:
cards = self.game.available_tier_2_cards
elif tier==3:
cards = self.game.available_tier_3_cards
elif tier=='all':
# only for recordkeeping
return {
'tier 1': self.game.available_tier_1_cards,
'tier 2': self.game.available_tier_2_cards,
'tier 3': self.game.available_tier_3_cards
}
return cards
def card_in_position_purchasing_cost(self, tier, position):
card = self.get_available_cards(tier)[position]
return self.card_purchasing_cost(card)
def card_purchasing_cost(self, card):
"""
this doubles as a boolean check
"""
cost = card['cost'] - self.discount
#calculates the surplus of each color for each part of the cost
difference = self.gems - cost
#checks to see if extra gold is enough to purchase
if not self.gems.can_pay_for(cost):
return None
else:
return cost.truncate_negatives() #new_gems = self.gems.make_payment(cost)
def purchase_card(self, action_data):
"""
this will translate the card input to purchase either available or reserved card functions below
"""
card = action_data['card_changes']['card']
tier = card['tier']
position = action_data['card_changes']['position']
purchase_type = action_data['card_changes']['type']
if purchase_type == 'reserved':
self.purchase_reserved_card(position=position)
elif purchase_type == 'board':
self.purchase_available_card(tier=tier, position=position)
def reserve_card(self, action_data):
"""
this will translate the reserving card input to reserve a card eitehr on board on on top of a deck
"""
card = action_data['reservation_changes']['card']
tier = card['tier']
position = action_data['reservation_changes']['position']
reserve_type = action_data['reservation_changes']['type']
if reserve_type == 'topdeck':
self.reserve_card_on_top(tier=tier)
elif reserve_type == 'board':
self.reserve_card_on_board(tier=tier, position=position)
self.n_reserved_cards += 1
self.n_reserved_cards_tiers[card['tier']-1] += 1
def purchase_available_card(self, tier, position, move_cards=True):
"""
note: I am unsure why I have a move_cards parameter here...
"""
card = self.get_available_cards(tier).pop(position)
card_cost = self.card_purchasing_cost(card)
actual_card_cost = ((self.gems + self.discount).calculate_actual_cost(card['cost'])-self.discount).truncate_negatives()
original_discount_and_gems = self.discount + self.gems
#add points
self.points+= card['points']
#add color # still technically valid with new class
self.discount[card['color']] += 1
#subtract gems
original_gems = self.gems.__copy__()
self.gems = self.gems-actual_card_cost#self.gems.make_payment(card_cost)
self.game.gems+=actual_card_cost
if self.gems.has_any_negatives() or self.game.gems.has_any_negatives():
print(card)
print('current')
print(self.gems)
print('original')
print(original_gems)
print('original gems and discount')
print(original_discount_and_gems)
#print('game')
#print(self.game.gems)
print('cost')
print(card_cost)
print('actual cost')
print(actual_card_cost)
raise ValueError('unexpected negative')
self.n_gems = self.gems.count()
'''
for color, amount in card_cost.iteritems():
self.gems[color] -= amount
self.n_gems -= amount
'''
#add card to inventory
self.n_cards += 1
if move_cards:
self.owned_cards.append(card)
#add new card, display warning if deck is empty
new_card_added = self.game.add_top_card_to_available(tier)
if not new_card_added and False:
print("WARNING: tier %s deck ran empty!" % tier)
def purchase_reserved_card(self, position, move_cards=True):
# dunno why I have move_cards variable
if move_cards:
card = self.reserved_cards.pop(position)
else:
card = self.reserved_cards[position]
card_cost = self.card_purchasing_cost(card)
actual_card_cost = ((self.gems + self.discount).calculate_actual_cost(card['cost'])-self.discount).truncate_negatives()
#add points
self.points+= card['points']
#add color
self.discount[card['color']] += 1
#subtract gems
self.gems = self.gems-actual_card_cost
self.game.gems+=actual_card_cost
self.n_gems = self.gems.count()
#add card to inventory
self.n_cards += 1
if move_cards:
self.owned_cards.append(card)
self.n_reserved_cards -= 1
self.n_reserved_cards_tiers[card['tier']-1] -= 1
def reserve_card_on_top(self, tier, move_cards=True):
# self.n_reserved_cards += 1
if move_cards:
self.reserved_cards.append(self.game.get_deck(tier).pop())
self.n_reserved_cards_tiers[tier-1] += 1
if self.n_gems < 10 and self.game.gems['gold'] > 0:
self.take_gems(ColorCombination(True, **{'gold':1}))
return 0
def reserve_card_on_board(self, tier, position):
# self.n_reserved_cards += 1
self.reserved_cards.append(self.game.get_available_cards(tier).pop(position))
self.game.add_top_card_to_available(tier)
self.n_reserved_cards_tiers[tier-1] += 1
if self.n_gems < 10 and self.game.gems['gold'] > 0:
self.take_gems(ColorCombination(True, **{'gold':1}))
return 0
def objective_check(self):
"""
at end of turn, checks to see if any objectives are met
if no, then returns 0
if only one, then it adds that and returns a 1
if more than one, decides which one, then adds it, then returns 1
"""
possible_objective_ids = []
for i, objective in enumerate(self.game.objectives):
if self.discount.can_pay_for(objective):
possible_objective_ids.append(i)
if len(possible_objective_ids) == 0:
return 0
elif len(possible_objective_ids) == 1:
self.points+=3
self.objectives.append(self.game.objectives.pop(possible_objective_ids[0]))
else:
objective_id = self.decide_on_objective(possible_objective_ids)
self.points+=3
self.objectives.append(self.game.objectives.pop(objective_id))
return 1
def decide_on_objective(self, possible_objectives):
"""
MEH METHOD: randomly choose an objective
AI METHOD: forecast all possibilities (3 at most, but most likely 2) and choose the one that most likely results in winning
DERIVED METHOD: check to see if any other players are within a turn of getting the other card; not 100% exact bc of reserved cards, and might be tedious
"""
return np.random.choice(possible_objectives)
def victory_check(self):
"""
if premature, will not compare number of cards and will
"""
if self.points >= 15:
self.game.last_turn = True
def take_gems(self, gems):
"""
adds gems to inventory
will not update code for ColorCombination class
"""
self.gems = self.gems + gems
self.game.gems = self.game.gems - gems
self.n_gems += gems.count()
'''
for color, amount in gems.iteritems():
self.gems[color] += amount
self.game.gems[color] -= amount
self.n_gems +=1
return 0
'''
def take_gems_options(self):
# returns a list of ColorCombination objects
single_colors = set()
double_colors = set()
for color in COST_COLOR_ORDER:
if self.game.gems[color] > 0:
if self.game.gems[color] > 3:
double_colors.add(color)
single_colors.add(color)
positive_possibilities = []
for color in double_colors:
positive_possibilities.append(ColorCombination(True, **{color:2}))
if len(single_colors)==1:
color = list(single_colors)[0]
positive_possibilities.append(ColorCombination(**{color:1}))
elif len(single_colors)==2:
positive_possibilities += color_combinations(single_colors, 2)
else:
positive_possibilities += color_combinations(single_colors, 3)
# determines returning requirements
if self.n_gems <= 7:
return positive_possibilities
else:
actual_possibilities = self.calculate_gem_returns(positive_possibilities)
return actual_possibilities
def calculate_gem_returns(self, possibilities):
# I am using a set to remove duplicates
# TODO: find a bug here
possibility_tuples = set()
n_gems = self.n_gems
for possibility in possibilities:
n = possibility.count()
number_to_return = n + n_gems - 10
if number_to_return <= 0:
possibility_tuples.add(possibility.as_tuple())
else:
return_combos = [ColorCombination(True, **Counter(x)) for x in combinations((self.gems + possibility).expand(), number_to_return)]
net_changes = [possibility-combo for combo in return_combos]
for change in net_changes:
possibility_tuples.add(change.as_tuple())
# convert hashable tuples back to ColorCombination objects
return [convert_tuple_to_color_combination(x) for x in possibility_tuples]
def reset(self, reset_extended_history=False):
"""
used when you want to keep the same players (saves some AI loading, possibly)
and start a new game; extended history is kept unless the flag is set to true to
reset it
"""
self.points = 0
#cards that contribute to cost reduction and points
self.owned_cards = []
#cards that can be purchased only by player
self.reserved_cards = []
#faster way of keeping track of cards
self.n_cards = 0
self.n_reserved_cards = 0
self.n_reserved_cards_tiers = [0 for i in range(3)]
self.gems = ColorCombination(uses_gold=True, **{color:0 for color in COLOR_ORDER})
self.n_gems = 0
self.discount = ColorCombination(**{color:0 for color in COST_COLOR_ORDER})
self.objectives = []
self.total_times_reserved = Counter()
self.total_gem_takes = Counter()
self.win = False
#self.draw = False#will allow multiple victories in rare instances
#describes the history for the current game
# describes game state at the beginning of a turn
self.q_state_history = []
self.lagged_q_state_history = []
# describes the actions in plain terms
self.plain_action_history = []
# these are the raw serializations used for each player in a turn
self.serialized_action_history = []
if reset_extended_history:
self.extended_serialized_action_history = []
self.extended_plain_action_history = []
self.extended_lagged_q_state_history = []
# length 59-61 (57 + number of players)
def serialize(self, gem_change=None, card_change=None, reservation_change=None, from_own_perspective=True):
"""
# note: this is for purchasing a card, not reserving
card_change - {'card': {...}, 'reserved_index': [0,1,2, None], 'position': [0,1,2,3, None]}
reserved_index is the index number of the reserved card in the player's inventory
position is the position of the card in its tier row if it's not reserved; does nothing in this function
card_reservation - {'type': 'board'/'topdeck', 'tier': [1,2,3], 'position':[0,1,2,3]}
type is if itis on the board or if it is on top of a deck
tier corresponds to the row or deck tier
position corresponds to the position on the board; 0 if on top of the deck
"""
# gem calculations
if gem_change is not None:
theoretical_gems = self.gems + gem_change
else:
theoretical_gems = self.gems
gem_serialization = theoretical_gems.serialize()
# print(self.gems.uses_gold)
# print(self.gems.serialize())
# card changes
reserved_card_serializations = [
serialize_card(card, not from_own_perspective)
for card in self.reserved_cards
]
# hypothetical reservation
if reservation_change is not None:
tier = reservation_change['tier']
if reservation_change['type'] == 'topdeck':
reserved_card_serializations.append(
serialize_card(make_blank_card(tier=tier))
)
else:
position = reservation_change['position']
reserved_card_serializations.append(
serialize_card(self.game.get_available_cards(tier=tier)[position])
)
n_reserved = len(reserved_card_serializations)
# fill in with blank reservation slots (they still need 0-filled serializations)
if n_reserved < 3:
reserved_card_serializations.extend((3-n_reserved) * [PURE_BLANK_CARD_SERIALIZATION])
if card_change is not None:
card = card_change['card']
theoretical_points = card_change['card']['points'] + self.points
if card_change['type'] == 'reserved':
reserved_index = card_change['position']
else:
reserved_index = None
color = card['color']
if reserved_index is not None:
reserved_card_serializations[reserved_index] = PURE_BLANK_CARD_SERIALIZATION
else:
# do nothing because that doesn't affect the player, only the board state which is handled elsewhere
pass
theoretical_discount = self.discount + ColorCombination(**{color:1})
else:
theoretical_points = self.points
theoretical_discount = self.discount
discount_serialization = theoretical_discount.serialize()
points_serialization = np.asarray([theoretical_points])
# order serialization to enforce symmetry requirements
order_serialization = np.zeros(4)
order_serialization[self.order] = 1.
# print('-----')
# print(gem_serialization)
# print(discount_serialization)
# print(points_serialization)
# print(order_serialization)
return {
'gems': gem_serialization, #6
'discount': discount_serialization, #5
'points': points_serialization, # 1
'reserved_cards': reserved_card_serializations, # 3 * 15
'order': order_serialization, #2-4 (number of players in the game)
}
def full_serializations(self, gem_changes=None, card_changes=None, reservation_changes=None):
"""
returns a list of dictionaries of the serializations that are used to describe the game state
these serializations are numpy arrays
"""
# first get serializations of other players, since those are static
other_player_serializations = [
player.serialize(from_own_perspective=False)
for player in self.other_players
]
# for each change combination, get own serialization and board serialization
self_serializations = []
game_serializations = []
for gem_change, card_change, reservation_change in zip_longest(
wrap_if_not_list(gem_changes),
wrap_if_not_list(card_changes),
wrap_if_not_list(reservation_changes)):
self_serializations.append(self.serialize(
gem_change=gem_change,
card_change=card_change,
reservation_change=reservation_change
))
game_serializations.append(
self.game.serialize(
gem_change=gem_change,
available_card_change=card_change, # None, # possibly major debug here
reservation_change=reservation_change
)
)
# return those values, which will be ready to be consumed by neural network input
return [
{
'other_players': other_player_serializations,
'self': self_serialization,
'game': game_serialization
}
for self_serialization, game_serialization in zip(self_serializations, game_serializations)
]
'''
return {
'other_players': other_player_serializations,
'self': self_serializations,
'game': game_serializations,
}
'''
def copy_plain_data(self):
"""
the game will use this when recording plain history
"""
return {
'gems': deepcopy(self.gems),
'discounts': deepcopy(self.discount),
'cards': deepcopy(self.owned_cards),
'n_cards': len(self.owned_cards),
'objectives': deepcopy(self.objectives),
'reserved_cards': deepcopy(self.reserved_cards),
'points': copy(self.points),
'order': copy(self.order),
'id': self.id,
'n_reserved_cards': self.n_reserved_cards,
'n_reserved_cards_tiers': deepcopy(self.n_reserved_cards_tiers),
'win': self.win,
'decision_weights': {
'temperature': self.temperature,
'decision_weighting': deepcopy(self.decision_weighting),
},
'n_gems': self.n_gems,