# Improvement Trial With Better Discount/Skip Connections

I added skip connections to the second-to-last layers, as well as wrote a better decision code to optimize decisionmaking.

In [1]:
from constants import *
from copy import deepcopy
from random import shuffle
from game import Game
import numpy as np
import sys

Using TensorFlow backend.


In [2]:
#game is now using lag=1 instead of lag=2 for q-score
#maybe this will help guide it to higher point values in short term, 
#with less opportunities for probabilities straying
test_config = deepcopy(DEFAULT_GAME_CONFIG)
test_config['memory'] = 1
test_config['discount'] = 0.01

init_game = Game(n_players=5, q_lag=1, config=test_config)
#goal: eventually train players to not want to grab more tickets

players = init_game.players
#former will be shuffled
player_list = [p for p in players]
overall_player_wins = []
overall_player_scores = []
overall_winning_scores = []
overall_game_turns = []



In [3]:
temperatures = [0.5, 0.2, 0.1, 0.1, 0.05, 0.01, 0.01, 0., 0., 0., 0.]

for temperature in temperatures:
    print 'TEMPERATURE: %.03f' % temperature
    test_config['temperature'] = temperature
    player_wins = [0 for _ in players]
    winning_scores = []
    player_scores = [[] for _ in players]
    game_turns = []
    for cycle in range(3):
        print 'in cycle %d' % cycle
        for i in range(60):
            sys.stdout.write('.')
            shuffle(players)
            game = Game(pre_existing_players=players, config=test_config)
            game.run()
            for i, player in enumerate(player_list):
                player_wins[i]+= player.win
                player_scores[i].append(player.total_points)
            winning_scores.append(game.winning_score)
            game_turns.append(game.turn)
        #don't train player 0, train player 1 partially, and train players 2 and 3 much more
        for i, player in enumerate(player_list):
            if i==1:
                player.ai.train_win()
                player.ai.train_q()
            elif i > 1:
                player.ai.train_win(20)
                player.ai.train_q(20)
    for player in player_list:
        player.ai.reset_history()
    overall_player_wins.append(player_wins)
    overall_player_scores.append(player_scores)
    overall_winning_scores.append(winning_scores)
    overall_game_turns.append(game_turns)
    #print out some summaries
    print 'WINNING SCORE AVERAGE: %.1f' % np.mean(winning_scores)
    print 'NUMBER OF TURNS AVERAGE: %.1f' % np.mean(game_turns)
    for i, player in enumerate(player_list):
        print '---PLAYER %d STATS---' % i
        print 'WIN AVERAGE: %.3f' % (np.mean(player_wins[i])/180.)
        print 'SCORE AVERAGE: %.1f' % np.mean(player_scores[i])
        

TEMPERATURE: 0.500
in cycle 0
............................................................in cycle 1
............................................................in cycle 2
............................................................WINNING SCORE AVERAGE: -6.8
NUMBER OF TURNS AVERAGE: 313.5
---PLAYER 0 STATS---
WIN AVERAGE: 0.050
SCORE AVERAGE: -52.5
---PLAYER 1 STATS---
WIN AVERAGE: 0.439
SCORE AVERAGE: -20.9
---PLAYER 2 STATS---
WIN AVERAGE: 0.289
SCORE AVERAGE: -28.6
---PLAYER 3 STATS---
WIN AVERAGE: 0.150
SCORE AVERAGE: -36.1
---PLAYER 4 STATS---
WIN AVERAGE: 0.089
SCORE AVERAGE: -41.4
TEMPERATURE: 0.200
in cycle 0
............................................................in cycle 1
............................................................in cycle 2
............................................................WINNING SCORE AVERAGE: 4.6
NUMBER OF TURNS AVERAGE: 282.5
---PLAYER 0 STATS---
WIN AVERAGE: 0.028
SCORE AVERAGE: -53.6
---PLAYER 1 STATS---
WIN AVERAGE: 0.733
SCORE AVERAGE

KeyboardInterrupt: 

It appears that player 1 is performing reasonably well. The difference between 10 and 20 epochs may be large because most of the network is shared by the win and q-score calculations. Reducing the epochs between them may stablize results, as would separating the networks into two separate ones (GPU resources are fairly inexpensive for me, anyway).

In [None]:
game.tickets

In [4]:
np.mean(overall_winning_scores,1)

array([ -6.79444444,   4.62777778,  24.63888889,  27.49444444,
        46.32777778,  59.32222222,  60.43333333])

In [5]:
np.mean(overall_player_scores, ( 2,))

array([[-52.5       , -20.94444444, -28.56111111, -36.15      , -41.4       ],
       [-53.61666667,  -0.77777778, -42.50555556, -42.15      ,
        -40.42222222],
       [-58.43888889,  24.16111111, -48.52777778, -48.05      ,
        -49.96666667],
       [-61.2       ,  25.97222222, -45.48888889, -47.06666667,
        -49.37222222],
       [-55.98888889,  45.53333333, -50.45      , -50.22777778,
        -52.33888889],
       [-52.12777778,  59.28333333, -49.96666667, -50.32222222,
        -52.61111111],
       [-53.72777778,  60.28333333, -49.93333333, -51.55      ,
        -53.77777778]])

In [6]:
for i, player in enumerate(player_list):
    player.ai.save_models('ai_h5/p%d_test005_%%s.h5' % i)

not sure why all player scores improved for the last row...perhaps training should lower temperature more quickly