In [1]:
%load_ext autoreload
%autoreload 2

import random
import numpy as np
import pandas as pd
import IPython.display as dp

from jassbot.Config import *
from jassbot.GameLogic import *
from jassbot.GameStrategy import *
from jassbot.Chrono import *

create_logger()

model = create_model()
stats = pd.DataFrame({'rounds won':[], 'games won':[]})
epsilon = 1

%matplotlib inline

Using Theano backend.


In [None]:
state = init()

simulations = 100

epochs = 1000
test_runs = 1000

logger = get_logger()

chrono = Chrono()
for s in range(simulations):

    temp_memory = []
    round_wins = []
    print('*** learning phase %i / %i' % (s+1, simulations))
    chrono.start()
    learning_time = 0
    for epoch in range(epochs):
        new_game(state)
        logger.info("*** start of a new game")
        print_state(state)

        # we first define the suit order used later for the algorithm, for each player
        # (depending on his hand and the current trump suit)
        suit_orders = []
        for p in range(4):
            player_hand = state['p%i_hand' % p]
            trump = state['trump']
            suit_order = order_suits(player_hand, trump)
            suit_orders.append(suit_order)

        for j in range(9):
            for i in range(4):
                suit_order = suit_orders[i]
                
                #chrono.start()
                result = play_once(state, lambda (possible_cards): \
                          choose(suit_order,state,possible_cards,model,temp_memory,epsilon))
                #chrono.stop("choose")

                assert i < 3 and result is None or \
                       i == 3 and result is not None

                if i == 3: # at the end of one round

                    round_wins.append(result)

                    if len(state['played']) == 36: 

                        # the game is finished, we can update the model
                        chrono.start()
                        update_model_game_end(model,temp_memory,round_wins,epsilon)
                        learning_time += chrono.stop()

    chrono.stop('learning')
    print('learning time: %i ms' % learning_time)

    game_wins = pd.DataFrame({'game won':[]})
    round_wins = pd.DataFrame({'round won':[]})

    print('*** testing phase %i / %i' % (s+1, simulations))
    game_wins_idx = 0
    round_wins_idx = 0
    chrono.start()
    for epoch in range(test_runs):
        new_game(state)
        logger.info("*** start of a new game")
        print_state(state)

        for j in range(4*9):
            result = play_once(state, lambda (possible_cards): \
                      choose_for_test(suit_order,state,possible_cards,model,temp_memory,epsilon))

            if result is not None:
                team = result['team']
                is_final = result['final']

                if is_final:
                    ratio = result['ratio']
                    df = pd.DataFrame({'game won':[1 if team == 0 else 0], 'ratio': [ratio]}, index=[game_wins_idx])
                    game_wins = game_wins.append(df)
                    game_wins_idx += 1
                else:
                    df = pd.DataFrame({'round won':[1 if team == 0 else 0]}, index=[round_wins_idx])
                    round_wins = round_wins.append(df)
                    round_wins_idx += 1
    chrono.stop('test')

    games_won = 1.0 * len(game_wins[game_wins['game won'] == 1]) / test_runs
    ratio = game_wins['ratio'].mean()

    # we have 8 rounds, excepted the final one that we don't count here
    rounds_won = 1.0 * len(round_wins[round_wins['round won'] == 1]) / (test_runs * 8)
    
    df = pd.DataFrame({'rounds won':[rounds_won], 'games won':[games_won], 'ratio':[ratio]},index=[s])
    dp.display(df)
    stats = stats.append(df)
    
    if epsilon > 0.1:
        epsilon -= (1/simulations)


stats.to_csv('data/%s-stats.csv')

*** learning phase 1 / 100
learning: 247 ms (1)
learning time: 149967 ms
*** testing phase 1 / 100
test: 324 ms (1)


Unnamed: 0,games won,ratio,rounds won
0,0.511,0.503522,0.487375


*** learning phase 2 / 100
learning: 775 ms (1)
learning time: 237891 ms
*** testing phase 2 / 100
test: 403 ms (1)


Unnamed: 0,games won,ratio,rounds won
1,0.495,0.492713,0.473875


*** learning phase 3 / 100
learning: 881 ms (1)
learning time: 263620 ms
*** testing phase 3 / 100
test: 406 ms (1)


Unnamed: 0,games won,ratio,rounds won
2,0.518,0.507917,0.5005


*** learning phase 4 / 100
learning: 918 ms (1)
learning time: 269969 ms
*** testing phase 4 / 100
test: 433 ms (1)


Unnamed: 0,games won,ratio,rounds won
3,0.519,0.503898,0.48


*** learning phase 5 / 100
learning: 521 ms (1)
learning time: 272617 ms
*** testing phase 5 / 100
test: 358 ms (1)


Unnamed: 0,games won,ratio,rounds won
4,0.464,0.487631,0.481125


*** learning phase 6 / 100
learning: 47 ms (1)
learning time: 294239 ms
*** testing phase 6 / 100
test: 969 ms (1)


Unnamed: 0,games won,ratio,rounds won
5,0.481,0.495338,0.4805


In [None]:
list(set([1,2,3]) - set([2,3]))

def rank_order(i1,i2):
    return i2['nb'] - i1['nb'] if i1['nb'] != i2['nb'] else i1['best_rank'] - i2['best_rank']

"abc".startswith("ad")
sorted([{'nb': 1, 'best_rank':4}, {'nb': 2, 'best_rank': 2}], cmp=rank_order)

#abc = []

#stats.index = range(len(stats))
#stats['ratio'].plot()
# a = [{'nb': 3, 'best_rank':4}, {'nb': 3, 'best_rank': 2}]
# print(a[0]['nb']*10+a[0]['best_rank'])
# print(a[1]['nb']*10+a[1]['best_rank'])

now_as_string()