# Analysis of *honest* case

### Problem description

In this notebook, we analyze the policy in the case, where the environment (so the second player) is honest.

In [1]:
from env import Hangman
from network import NNAgent, Network

In [2]:
policy_network = Network()
policy_network.load_weights('policy.h5') #input p
player = NNAgent(policy_network)
player.eval() #setting to evaluation state so the agent will not memorize play history.

In [3]:
player.reset_guessed()

In [4]:
max_lives = 8

In [5]:
# test setup
env = Hangman('top_10000_frequent_words.txt' , max_lives = 8, verbose = False)

done = False
state = env.reset()

while not done :
    guess = player.select_action(state)
    print('Guessing', guess)
    state, _ , done , _ = env.step(guess)
    print(f">>  {state}  |   {env.get_current_live()}")
print()
    
player.reset_guessed()

Guessing e
>>  -e----e-  |   8
Guessing r
>>  -e-r--e-  |   8
Guessing d
>>  -e-r--e-  |   7
Guessing a
>>  -ear--e-  |   7
Guessing s
>>  sear--es  |   7
Guessing i
>>  sear--es  |   6
Guessing h
>>  sear-hes  |   6
Guessing c
>>  searches  |   6



## Guessing score

Derive how many words are guessed correctly using the top 10000 frequent words.

In [11]:
from tqdm import tqdm

# (re-)initialize the environment
env = Hangman('top_10000_frequent_words.txt' , max_lives = 8, verbose = False)
player.reset_guessed()

# count the number of correct guesses
correct = 0

# loop through all the words
for word in tqdm(reversed(env.words[-100:])): # tqdm(env.words):
    # reset the environment
    state = env.reset(word)
    done = False
    # play the game
    while not done:
        guess = player.select_action(state)
        state, _ , done , _ = env.step(guess)
    # check if the word is guessed correctly
    if env.is_game_won():
        correct += 1
    player.reset_guessed()

100it [00:54,  1.83it/s]


Percentage of correct guesses:

## Archive

Hyperparameter analysis. Not useful if the model is already trained.

In [None]:
from tqdm.auto import tqdm
from itertools import product
import numpy as np

# hyperparameters
hyperparameters = {
    'lives': [6, 8, 10],
    'win_reward': [10, 30, 100],
    'correct_reward': [0, 1, 10],
    'lose_reward': [-0, -1],
    'false_reward': [-0, -1],
    'repeated_guessing_penalty': [-0, -100]
}

# results dictionary for all hyperparameter combinations
results = {}

# evalute over 1000 samples for different hyperparameters
for hyp_setting in tqdm(product(*hyperparameters.values())):
    env = Hangman(
        'top_10000_frequent_words.txt',
        max_lives=hyp_setting[0],
        win_reward=hyp_setting[1],
        correct_reward=hyp_setting[2],
        lose_reward=hyp_setting[3],
        false_reward=hyp_setting[4],
        repeated_guessing_penalty=hyp_setting[5],
        verbose=False
    )    
    reward_list = []
    for i in range(50):
        done = False
        state = env.reset()
        # make guesses until game is done, save the reward
        cum_reward = 0
        while not done :
            guess = player.select_action(state)
            state, reward, done , _ = env.step(guess)
            cum_reward += reward
        player.reset_guessed()
        # save reward
        reward_list.append(cum_reward)
    # for each hyperparameter combination, save the mean and std of reward
    results[hyp_setting] = (np.mean(reward_list), np.std(reward_list))