In [1]:
from game import Game
from ai import SplendorAI
from player import get_phase_parameters
from constants import *
from datetime import datetime
# from player import get_phase_parameters
import sys
from collections import defaultdict



  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# simply used to initialize players
base_game = Game(id=0, n_players=4)

In [3]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2997567613366299711
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 9832533197
locality {
  bus_id: 1
  links {
  }
}
incarnation: 5857441652881952360
physical_device_desc: "device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1"
]


In [4]:
import tensorflow as tf
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

In [5]:
# temperature will decrease over time, but bump up for each new phase
def calculate_temperature(round):
    return 4/(2.5+round) /(1.5+round % 4)/(1+4*(round==20))

In [6]:
# constants to define; default
NETWORK_HYPERPARAMETERS = {
    # player input
    'player_funnel_layers': [15,12,10],
    'reserved_funnel_layers': [12,10,],
    'inject_reserved_at_player_funnel_index': 1, # 0 same as input, 1 = as first layer, etc.
    'card_funnel_layers': [12,12,8],
    # game input
    'game_funnel_layers': [15, 12, 10],
    'game_objective_funnel_layers': [10, 8],
    'game_card_funnel_layers': [15, 12, 10],
    # overall, slightly increased from default
    'main_dense_layers': [84, 36, 15], #this is when everything is combined

    # output layers
    # this does not include the win layer
    'output_layers': [
        {
            'name': 'Q1',
            'lag': 1,
            'score': 1,
            'discount': 0.1,
            'gems': 0.01,

        },
        {
            'name': 'Q3',
            'lag':  3,
            'score': 1,
            'discount': 0.1,
            'gems': 0,
        },
        {
            'name': 'Q5',
            'lag': 5,
            'score': 1,
            'discount': 0.05,
            'gems': 0,
        },
    ],
}

def get_phase_parameters(phase):
    """
    training will be divided into 5 phases

    """
    if phase==1:
        return {
            'Q1': 0.5,
            'Q3': 0.3,
            'Q5': 0.15,
            'win': 0.05,
        }
    elif phase==2:
        return {
            'Q1': 0.4,
            'Q3': 0.25,
            'Q5': 0.2,
            'win': 0.15,
        }
    elif phase==3:
        return {
            'Q1': 0.25,
            'Q3': 0.25,
            'Q5': 0.25,
            'win': 0.25,
        }
    elif phase==4:
        return {
            'Q1': 0.15,
            'Q3': 0.2,
            'Q5': 0.35,
            'win': 0.3,
        }
    elif phase==5:
        return {
            'Q1': 0.05,
            'Q3': 0.1,
            'Q5': 0.35,
            'win': 0.50,
        }
    elif phase==6:
        return {
            'Q1': 0.03,
            'Q3': 0.1,
            'Q5': 0.22,
            'win': 0.75
        }

In [7]:
players = base_game.players
game_data = defaultdict(list)



In [8]:
set_durations = {}

# Test multiple runs of games

In [10]:
n_rounds = 22
n_sets_per_round = 2
n_simulations_per_set = 100

start_time = datetime.now()
for i in range(n_rounds):
    print('ON ROUND', i)
    for j in range(n_sets_per_round):
        set_start_time = datetime.now()
        for k in range(n_simulations_per_set):
            # if (i==0) and (j==0):
            #    # soft restart
            #    break
            new_game = Game(id=i*200 + j*100+k, players=players)
            stalemate = new_game.run()  
            # stalemates should be extraordinarily rare and pretty much nonexistent
            if stalemate: 
                very_long_game = new_game
                print('teaching ai not to stalemate')
                for player in players:
                    player.transfer_history_to_ai()
                    player.ai.train_models(verbose=0,n_epochs=4)
            sys.stdout.write('.')
            sys.stdout.flush()
            # record historic game data for each set
            game_data[(i,j)].append(new_game.copy_plain_data_for_self_and_players())
        set_stop_time = datetime.now()
        duration = (set_start_time-set_stop_time).seconds
        for player in players:
            player.transfer_history_to_ai()
            player.ai.train_models(verbose=0, n_epochs=12)
        set_durations[(i,j,k)] = duration
        print('/')
        sys.stdout.flush()
        avg_game_length = np.mean([x['game']['turn'] for x in game_data[(i,j)]])
        print('R/S %d/%d AVERAGE GAME LENGTH: ' % (i, j), avg_game_length)
        avg_cards_purchased = np.mean([
            [
                pdata['n_cards'] 
                for pdata in x['players'].values()
            ] 
            for x in game_data[(i,j)]
        ]
        )
        print('R/S %d/%d AVERAGE CARDS PURCHASED: ' % (i, j), avg_cards_purchased)
        # calculate win rates
        player_data = [x['players'] for x in game_data[(i,j)]]
        win_values = [[player_data[idx][pid]['win'] for idx in range(100)] for pid in range(4)]
        win_rates = [np.mean(v) for v in win_values]
        print('WIN RATES: ', str(win_rates))
                             
    for player in players:
        player.reset(reset_extended_history=True)
    phase = min(6, i // 4 + 1)
    temperature = calculate_temperature(i)
    
    for p_i, player in enumerate(players):
        player.temperature = temperature
        player.decision_weighting = get_phase_parameters(phase)
        model_name = 'run6_player_%s_round_' % str(p_i)
        player.ai.save_models(model_name, index=i)
    
    
stop_time = datetime.now()
for time in [start_time, stop_time]:
    print(time.strftime('%x %X'))
    
# save historic data
import pickle
with open('run_6_game_data.dat', 'wb') as f:
    pickle.dump(game_data, f)
    
with open('run_6_duration_data.dat', 'wb') as f:
    pickle.dump(set_durations, f)
    


ON ROUND 0
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
/
R/S 0/0 AVERAGE GAME LENGTH:  149.8
R/S 0/0 AVERAGE CARDS PURCHASED:  13.6625
WIN RATES:  [0.27, 0.15, 0.22, 0.36]
....................................................................................................training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
/
R/S 0/1 AVERAGE GAME LENGTH:  128.16
R/S 0/1 AVERAGE CARDS PURCHASED:  13.61
WIN RATES:  [0.2, 0.3, 0.33, 0.17]
saving run6_player_0_round__win_0.h5
saving run6_player_0_round__Q1_0.h5
s

training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
/
R/S 5/0 AVERAGE GAME LENGTH:  119.88
R/S 5/0 AVERAGE CARDS PURCHASED:  13.795
WIN RATES:  [0.18, 0.26, 0.3, 0.26]
....................................................................................................training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
/
R/S 5/1 AVERAGE GAME LENGTH:  120.2
R/S 5/1 AVERAGE CARDS PURCHASED:  13.9625
WIN RATES:  [0.2, 0.26, 0.29, 0.25]
saving run6_player_0_round__win_5.h5
saving run6_player_0_round__Q1_5.h5
saving run6_player_0_round__Q3_5.h5
saving run6_player_0_round__Q5_5.h5
saving run6_player_1_round__win_5.h5
saving run6_player_1_round__Q1_5.h5
saving run6_player_1_round__Q3_

....................................................................................................training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
/
R/S 10/1 AVERAGE GAME LENGTH:  119.12
R/S 10/1 AVERAGE CARDS PURCHASED:  14.1525
WIN RATES:  [0.16, 0.32, 0.25, 0.27]
saving run6_player_0_round__win_10.h5
saving run6_player_0_round__Q1_10.h5
saving run6_player_0_round__Q3_10.h5
saving run6_player_0_round__Q5_10.h5
saving run6_player_1_round__win_10.h5
saving run6_player_1_round__Q1_10.h5
saving run6_player_1_round__Q3_10.h5
saving run6_player_1_round__Q5_10.h5
saving run6_player_2_round__win_10.h5
saving run6_player_2_round__Q1_10.h5
saving run6_player_2_round__Q3_10.h5
saving run6_player_2_round__Q5_10.h5
saving run6_player_3_round__win_10.h5
sav

....................................................................................................training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
/
R/S 15/1 AVERAGE GAME LENGTH:  116.68
R/S 15/1 AVERAGE CARDS PURCHASED:  14.0075
WIN RATES:  [0.33, 0.18, 0.28, 0.21]
saving run6_player_0_round__win_15.h5
saving run6_player_0_round__Q1_15.h5
saving run6_player_0_round__Q3_15.h5
saving run6_player_0_round__Q5_15.h5
saving run6_player_1_round__win_15.h5
saving run6_player_1_round__Q1_15.h5
saving run6_player_1_round__Q3_15.h5
saving run6_player_1_round__Q5_15.h5
saving run6_player_2_round__win_15.h5
saving run6_player_2_round__Q1_15.h5
saving run6_player_2_round__Q3_15.h5
saving run6_player_2_round__Q5_15.h5
saving run6_player_3_round__win_15.h5
sav

....................................................................................................training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
training win model
training Q1 model
training Q3 model
training Q5 model
/
R/S 20/1 AVERAGE GAME LENGTH:  117.6
R/S 20/1 AVERAGE CARDS PURCHASED:  14.165
WIN RATES:  [0.2, 0.33, 0.22, 0.25]
saving run6_player_0_round__win_20.h5
saving run6_player_0_round__Q1_20.h5
saving run6_player_0_round__Q3_20.h5
saving run6_player_0_round__Q5_20.h5
saving run6_player_1_round__win_20.h5
saving run6_player_1_round__Q1_20.h5
saving run6_player_1_round__Q3_20.h5
saving run6_player_1_round__Q5_20.h5
saving run6_player_2_round__win_20.h5
saving run6_player_2_round__Q1_20.h5
saving run6_player_2_round__Q3_20.h5
saving run6_player_2_round__Q5_20.h5
saving run6_player_3_round__win_20.h5
saving

In [15]:
players[0].ai.load_models('run6_player_0_round_', 1)

loading run6_player_0_round__win_1.h5
loading run6_player_0_round__Q1_1.h5
loading run6_player_0_round__Q3_1.h5
loading run6_player_0_round__Q5_1.h5


In [None]:
from collections import Mapping, Container 
from sys import getsizeof

def deep_getsizeof(o, ids): 

    d = deep_getsizeof
    if id(o) in ids:
        return 0

    r = getsizeof(o)
    ids.add(id(o))

    if isinstance(o, str) or isinstance(0, bytes):
        return r
    
    if isinstance(o, np.ndarray):
        return r

    if isinstance(o, Mapping):
        return r + sum(d(k, ids) + d(v, ids) for k, v in o.items())

    if isinstance(o, Container):
        return r + sum(d(x, ids) for x in o)

    return r

# deep_getsizeof(players[0].extended_serialized_action_history[0], set())

# for k in list(locals().keys()):
#    v = locals()[k]
#    size = deep_getsizeof(v, set())
#    if size > 100000:
#        print(k, ':', size)

In [None]:
# players[1].reset(reset_extended_history=True)
# for k in dir(players[1]):
#    v = getattr(players[1], k)
#    if isinstance(v, (list, dict)):
#        print(k, len(v))

In [None]:
#new_game = Game(id=i*200 + j*100+1.1, players=players)

In [None]:
#for k in dir(players[2]):
#    v = getattr(players[2], k)
#    if isinstance(v, (list, dict)):
#        print(k, len(v))

In [None]:
#len(players[0].ai.extended_serialized_history)

# NOTES

Runs are complete. Data can be analyzed in a different environment. Tweaks may be made for future runs based on simulation results from loaded models.