In [13]:
import h5py
import capnp
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [14]:
path_to_schema = "../../src/capnp/CapnpGame.capnp"
path_to_game_folder = "/Users/florian/Desktop/games2/"

In [15]:
schema = capnp.load(path_to_schema).Game

In [16]:
def get_games_from_hdf5(path_to_game_folder):
    games = []

    for file in os.listdir(path_to_game_folder):
        if file.endswith(".h5"):
            f = h5py.File(path_to_game_folder + file, "r")
            dataset = f["game_record"]

            for i in range(dataset.attrs["count_id"]):
                game = schema.from_bytes(dataset[i].tostring())
                games.append(game)
                
    return games

In [19]:
def get_games_from_hdf5(list_of_files, schema):
    """ list_of_files: absolute path of hdf5 files"""
    games = []

    for file in list_of_files:
        if file.endswith(".h5"):
            f = h5py.File(file, "r")
            dataset = f["game_record"]

            for i in range(dataset.attrs["count_id"]):
                game = schema.from_bytes(dataset[i].tostring())
                games.append(game)
                
    return games

In [11]:
len(games)

3100

In [7]:
def get_games_without_res(games):
    """filter games such that we only have games with no resignation"""
    return [game for game in games if game.noresignmode]

In [8]:
games = get_games_without_res(games)

In [9]:
len(games)

0

In [30]:
def get_winrates_for_player(games, player):
    """Returns the winrates for only the given player"""
    winrate_lists = [[sp.winrate for sp in game.stateprobs] for game in games]    
    filtered_winrates_list = []
    
    for winrates in winrate_lists:
        filtered_winrates = [winrate for i, winrate in enumerate(winrates) if i % 2 == player]
        filtered_winrates_list.append(filtered_winrates)
        
    return filtered_winrates_list

In [31]:
def get_games_won_by_player(games, player):
    return [game for game in games if game.result == player]

In [32]:
def get_winrates_for_games_won_by_player(games, player):
    """Get the winrates only for this player in the games won by the player"""
    games_won_by_player = get_games_won_by_player(games, player)
    only_player_winrates = get_winrates_for_player(games_won_by_player, player)
    
    return only_player_winrates

In [33]:
black = get_winrates_for_games_won_by_player(games, 1)
white = get_winrates_for_games_won_by_player(games, 0)

dataset = black + white

In [34]:
len(dataset)

3100

In [35]:
def find_fn_count(threshold, winrate_lists):
    """
    threshold: float, below what win rate should the player resign?
    winrates: list of lists of floats, win rates of player in several games
    """
    return sum([any([1 - winrate <= threshold for winrate in winrates]) for winrates in winrate_lists])

In [60]:
find_fn_count(0.05, dataset)

5

In [55]:
len(dataset)

3100

In [64]:
def compute_threshold(games, max_fn_rate):
    #games = get_games_without_res(games)
    black = get_winrates_for_games_won_by_player(games, 1)
    white = get_winrates_for_games_won_by_player(games, 0)
    dataset = black + white

    thresholds = np.linspace(0, 0.1, num=1000)
        
    results = np.array([find_fn_count(threshold, dataset) for threshold in thresholds])
    results = results / len(dataset)

    
    best_i = np.where(results <= max_fn_rate)[0][-1]
    return thresholds[best_i]

In [66]:
compute_threshold(games, 0.05)

0


0.10000000000000001

In [None]:
path_to_schema = "../../src/capnp/CapnpGame.capnp"
path_to_game_folder = "/Users/florian/Desktop/games2/"

schema = capnp.load(path_to_schema).Game

games = get_games_from_hdf5(path_to_game_folder)
compute_threshold(games, 0.05)

Wie finde ich raus, wer den letzten Zug gemacht hat?
-Anhand der Länge der Spiele. Wenn immer Schwarz beginnt, sind Spiele mit ungerader Länge die, bei denen schwarz den letzen Zug hatte.

False negatives minimieren: Situationen, in denen wir aufgegeben haben aber gewonnen hätten. Dabei soll der Threshold maximiert werden -> so früh wie möglich aufhören.
Threshold so hoch wie geht, ohne dass False negatives über x% gehen.




In [None]:
def calculate_winrate(winrate_lists, player):
    """Calculates winrates according to the player: player = 1 black, player = 0 white"""
    for winrates in winrates_list:
        for i, winrate in enumerate(winrates):
            if i % 2 == player:
                winrates[i] = 1 - winrate
                

In [None]:
def naive_find_fn_count(threshold, winrate_lists):
    summe = 0
    for winrate_list in winrate_lists:
        is_th = 0
        for winrate in winrate_list:
            if (1 - winrate <= threshold):
                is_th = 1
        if is_th:
            summe += 1
    return summe

In [115]:
file_list = []
for file in os.listdir(path_to_game_folder):
    file_list.append(file)

In [116]:
path_to_game_folder = "/Users/florian/Desktop/games/"

In [117]:
def read_games(path_to_game_folder, file_list):
    games = []
    
    for file in file_list:
        f = open(path_to_game_folder + file, 'rb') 
        game = schema.read(f)
        games.append(game)
        
    return games

In [None]:
games = read_games(path_to_game_folder, file_list)

In [None]:
plt.plot(thresholds, results)