# Analysis

### Setup

In [None]:
from env import Hangman, HangmanCheat
from network import NNAgent, Network, CheatAgent

In [None]:
policy_network = Network()
policy_network.load_weights('models/policy.h5') #input p
player = NNAgent(policy_network)
player.eval() #setting to evaluation state so the agent will not memorize play history.
player.reset_guessed()

In [None]:
# test setup
env = Hangman('words/top_1000.txt' , max_lives = 8, verbose = True)

done = False
state = env.reset()

while not done :
    guess = player.select_action(state)
    print('Guessing', guess)
    state, _ , done , _ = env.step(guess)
    
player.reset_guessed()

In [None]:
# test cheat setup
env = HangmanCheat('words/top_1000.txt' , max_lives = 8, verbose = True, reject_rate=0.2)

done = False
state = env.reset()

while not done :
    guess = player.select_action(state)
    print('Guessing', guess)
    state, _ , done , _ = env.step(guess)
    
player.reset_guessed()

## *Honest* case

Analyze the given policy in the case, where the environment (so the second player) is honest.

Derive how many words are guessed correctly using the top 1000 frequent words for different maximum number of guesses.

In [None]:
from tqdm import tqdm

# results dictionary for max_lives in range 1-10
results = {}

for max_lives in range(1, 11):
    print(f"Testing with max_lives = {max_lives}")
    print("=====================================")

    # (re-)initialize the environment
    env = Hangman('words/top_1000.txt' , max_lives = max_lives, verbose = False)
    player.reset_guessed()

    # count the number of correct guesses
    correct = 0

    # loop through all the words
    for word in tqdm(env.words):
        # reset the environment
        state = env.reset(word)
        done = False
        # play the game
        while not done:
            guess = player.select_action(state)
            state, _ , done , _ = env.step(guess)
        # check if the word is guessed correctly
        if env.is_game_won():
            correct += 1
        player.reset_guessed()
    results[max_lives] = correct / len(env.words)

print(results)

## *Cheat* case


### Original Policy
Analyze the given policy in the case, where the environment (so the second player) is cheating.

Keep the number of maximum guesses fixed at first. Derive how many words are guessed correctly using the top 1000 frequent words and vary the cheating factor.

In [None]:
from tqdm import tqdm

# results dictionary for reject_rate in range 0-1 and max_lives in range 1-10
results = {}

for max_lives in range(1, 11):
    for reject_rate in range(0, 10):
        reject_rate /= 10
        print(f"Testing with max_lives = {max_lives} reject_rate = {reject_rate}")
        print("=====================================")

        # (re-)initialize the environment
        env = HangmanCheat('words/sample_1000.txt' , max_lives = max_lives, verbose = False, reject_rate=reject_rate)
        player.reset_guessed()

        # count the number of correct guesses
        correct = 0

        # loop through all the words
        for word in tqdm(env.words):
            # reset the environment
            state = env.reset(word)
            done = False
            # play the game
            while not done:
                guess = player.select_action(state)
                state, _ , done , _ = env.step(guess)
            # check if the word is guessed correctly
            if env.is_game_won():
                correct += 1
            player.reset_guessed()
        results[(max_lives, reject_rate)] = correct / len(env.words)

    print(results)

### Fine-Tuned Policy

Fine-tune the policy to the cheating environment. Derive how many words are guessed correctly using the top 1000 frequent words and vary the cheating factor.

In [None]:
# print a loss curve for the policy at different epochs
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

cheat_player = CheatAgent(Network())

results = {
    'epoch': [],
    'loss': []
}

for ep in tqdm(range(0, 20001, 1000)):
    cheat_player.load_weights(f'models/finetuned/policy_finetuned_{ep}.h5')
    cheat_player.reset_guessed()
    cheat_env = HangmanCheat('words/top_1000.txt' , max_lives = 6, verbose = False)

    # take an average loss of 20 random sampled words
    losses = []
    for _ in range(20):
        state = cheat_env.reset()
        done = False
        while not done:
            guess = cheat_player.select_action(state)
            state, reward, done, ans = cheat_env.step(guess)
        cheat_player.finalize_episode(ans)
        loss = cheat_player.train_model()
        losses.append(loss)
    
    results['epoch'].append(ep)
    results['loss'].append(np.mean(losses))

# save the results
import pandas as pd
loss_df = pd.DataFrame(results)
loss_df.to_csv('results/finetuning_loss.csv', index=False)

In [None]:
import matplotlib.pyplot as plt

plt.plot(results['epoch'], results['loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Finetuning Loss')
plt.tight_layout()
plt.savefig('results/finetuning_loss.png')

In [22]:
# load final weights
cheat_player = CheatAgent(Network())
cheat_player.load_weights('models/finetuned/policy_finetuned_3000.h5')
cheat_player.eval()

In [26]:
from tqdm import tqdm

# results dictionary for reject_rate in range 0-1 and max_lives in range 1-10
results = {
    'max_lives': [],
    'reject_rate': [],
    'win_rate': [],
    'num_words_cheated': [],
    'num_words_doubted': [],
    'num_words_doubted_correct': []
}


for max_lives in range(9, 11):
    for reject_rate in range(0, 11):
        reject_rate /= 10
        print(f"Testing with max_lives = {max_lives} reject_rate = {reject_rate}")
        print("=====================================")

        # (re-)initialize the environment
        env = HangmanCheat('words/top_1000.txt' , max_lives = max_lives, verbose = False, reject_rate=reject_rate)
        cheat_player.reset_guessed()

        # count the number of correct guesses
        wins = 0
        num_words_cheated = 0
        num_words_doubted = 0
        num_words_doubted_correct = 0

        # loop through all the words
        for word in tqdm(env.words):
            # reset the environment
            state = env.reset(word)
            done = False
            # play the game
            while not done:
                guess = cheat_player.select_action(state)
                state, _ , done , ans = env.step(guess)
            if env.is_game_won():
                wins += 1
            if ans['cheated'] == True:
                num_words_cheated += 1
            if ans['doubted'] == True:
                num_words_doubted += 1
            if ans['doubted'] == True and ans['cheated'] == True and (ans['doubted_step'] > ans['cheated_step']):
                num_words_doubted_correct += 1
            cheat_player.reset_guessed()

        # append results to the dictionary
        results['max_lives'].append(max_lives)
        results['reject_rate'].append(reject_rate)
        results['win_rate'].append(wins / len(env.words))
        results['num_words_cheated'].append(num_words_cheated)
        results['num_words_doubted'].append(num_words_doubted)
        results['num_words_doubted_correct'].append(num_words_doubted_correct)

        print(f"({max_lives}, {reject_rate}): Win rate {wins / len(env.words)}, Num words cheated {num_words_cheated}, Num words doubted {num_words_doubted}, Num words doubted correct {num_words_doubted_correct}")

        # save the results
        import pandas as pd
        df = pd.DataFrame(results)
        df.to_csv('results/cheat_agent_top_1000_ml9-11.csv')

Testing with max_lives = 9 reject_rate = 0.0


100%|██████████| 1000/1000 [09:07<00:00,  1.83it/s]


(9, 0.0): Win rate 0.556, Num words cheated 0, Num words doubted 221, Num words doubted correct 0
Testing with max_lives = 9 reject_rate = 0.1


100%|██████████| 1000/1000 [09:40<00:00,  1.72it/s]


(9, 0.1): Win rate 0.574, Num words cheated 321, Num words doubted 374, Num words doubted correct 216
Testing with max_lives = 9 reject_rate = 0.2


100%|██████████| 1000/1000 [09:52<00:00,  1.69it/s]


(9, 0.2): Win rate 0.585, Num words cheated 565, Num words doubted 454, Num words doubted correct 353
Testing with max_lives = 9 reject_rate = 0.3


100%|██████████| 1000/1000 [10:06<00:00,  1.65it/s]


(9, 0.3): Win rate 0.584, Num words cheated 736, Num words doubted 526, Num words doubted correct 459
Testing with max_lives = 9 reject_rate = 0.4


100%|██████████| 1000/1000 [10:09<00:00,  1.64it/s]


(9, 0.4): Win rate 0.573, Num words cheated 803, Num words doubted 548, Num words doubted correct 492
Testing with max_lives = 9 reject_rate = 0.5


100%|██████████| 1000/1000 [10:01<00:00,  1.66it/s]


(9, 0.5): Win rate 0.605, Num words cheated 885, Num words doubted 572, Num words doubted correct 543
Testing with max_lives = 9 reject_rate = 0.6


100%|██████████| 1000/1000 [10:05<00:00,  1.65it/s]


(9, 0.6): Win rate 0.579, Num words cheated 922, Num words doubted 566, Num words doubted correct 547
Testing with max_lives = 9 reject_rate = 0.7


100%|██████████| 1000/1000 [09:57<00:00,  1.67it/s]


(9, 0.7): Win rate 0.594, Num words cheated 948, Num words doubted 572, Num words doubted correct 559
Testing with max_lives = 9 reject_rate = 0.8


100%|██████████| 1000/1000 [10:02<00:00,  1.66it/s]


(9, 0.8): Win rate 0.579, Num words cheated 956, Num words doubted 583, Num words doubted correct 565
Testing with max_lives = 9 reject_rate = 0.9


100%|██████████| 1000/1000 [10:04<00:00,  1.65it/s]


(9, 0.9): Win rate 0.586, Num words cheated 967, Num words doubted 581, Num words doubted correct 572
Testing with max_lives = 9 reject_rate = 1.0


100%|██████████| 1000/1000 [09:53<00:00,  1.68it/s]


(9, 1.0): Win rate 0.624, Num words cheated 972, Num words doubted 605, Num words doubted correct 600
Testing with max_lives = 10 reject_rate = 0.0


100%|██████████| 1000/1000 [09:35<00:00,  1.74it/s]


(10, 0.0): Win rate 0.565, Num words cheated 0, Num words doubted 287, Num words doubted correct 0
Testing with max_lives = 10 reject_rate = 0.1


100%|██████████| 1000/1000 [10:09<00:00,  1.64it/s]


(10, 0.1): Win rate 0.641, Num words cheated 369, Num words doubted 476, Num words doubted correct 284
Testing with max_lives = 10 reject_rate = 0.2


100%|██████████| 1000/1000 [10:19<00:00,  1.61it/s]


(10, 0.2): Win rate 0.65, Num words cheated 593, Num words doubted 552, Num words doubted correct 435
Testing with max_lives = 10 reject_rate = 0.3


100%|██████████| 1000/1000 [10:17<00:00,  1.62it/s]


(10, 0.3): Win rate 0.703, Num words cheated 732, Num words doubted 644, Num words doubted correct 561
Testing with max_lives = 10 reject_rate = 0.4


100%|██████████| 1000/1000 [10:16<00:00,  1.62it/s]


(10, 0.4): Win rate 0.715, Num words cheated 825, Num words doubted 686, Num words doubted correct 625
Testing with max_lives = 10 reject_rate = 0.5


100%|██████████| 1000/1000 [10:26<00:00,  1.60it/s]


(10, 0.5): Win rate 0.73, Num words cheated 882, Num words doubted 706, Num words doubted correct 671
Testing with max_lives = 10 reject_rate = 0.6


100%|██████████| 1000/1000 [10:28<00:00,  1.59it/s]


(10, 0.6): Win rate 0.742, Num words cheated 918, Num words doubted 717, Num words doubted correct 692
Testing with max_lives = 10 reject_rate = 0.7


100%|██████████| 1000/1000 [10:40<00:00,  1.56it/s]


(10, 0.7): Win rate 0.739, Num words cheated 945, Num words doubted 718, Num words doubted correct 694
Testing with max_lives = 10 reject_rate = 0.8


100%|██████████| 1000/1000 [10:32<00:00,  1.58it/s]


(10, 0.8): Win rate 0.745, Num words cheated 960, Num words doubted 730, Num words doubted correct 711
Testing with max_lives = 10 reject_rate = 0.9


100%|██████████| 1000/1000 [10:24<00:00,  1.60it/s]


(10, 0.9): Win rate 0.757, Num words cheated 973, Num words doubted 748, Num words doubted correct 740
Testing with max_lives = 10 reject_rate = 1.0


100%|██████████| 1000/1000 [10:28<00:00,  1.59it/s]

(10, 1.0): Win rate 0.726, Num words cheated 978, Num words doubted 716, Num words doubted correct 701





## Results

#### Top 1000 words

| max_lives | 0.0   | 0.1   | 0.2   | 0.3   | 0.4   | 0.5   | 0.6   | 0.7   | 0.8   | 0.9   | 1.0   |
|----------:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|
| 1         | 0.013 | 0.005 | 0.005 | 0.002 | 0.001 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| 2         | 0.051 | 0.030 | 0.020 | 0.012 | 0.011 | 0.004 | 0.001 | 0.000 | 0.000 | 0.000 | 0.000 |
| 3         | 0.106 | 0.065 | 0.033 | 0.018 | 0.005 | 0.001 | 0.000 | 0.003 | 0.002 | 0.001 | 0.000 |
| 4         | 0.210 | 0.127 | 0.070 | 0.040 | 0.019 | 0.013 | 0.006 | 0.003 | 0.000 | 0.000 | 0.000 |
| 5         | 0.317 | 0.178 | 0.107 | 0.061 | 0.033 | 0.014 | 0.010 | 0.004 | 0.000 | 0.000 | 0.000 |
| 6         | 0.434 | 0.251 | 0.155 | 0.069 | 0.046 | 0.024 | 0.009 | 0.006 | 0.003 | 0.001 | 0.000 |
| 7         | 0.552 | 0.307 | 0.176 | 0.108 | 0.040 | 0.025 | 0.018 | 0.004 | 0.006 | 0.002 | 0.000 |
| 8         | 0.646 | 0.375 | 0.193 | 0.128 | 0.066 | 0.038 | 0.014 | 0.006 | 0.002 | 0.002 | 0.000 |
| 9         | 0.721 | 0.432 | 0.268 | 0.148 | 0.074 | 0.033 | 0.017 | 0.006 | 0.005 | 0.001 | 0.000 |
| 10        | 0.781 | 0.458 | 0.274 | 0.150 | 0.082 | 0.046 | 0.017 | 0.014 | 0.002 | 0.003 | 0.000 |

#### Sample 1000 words

| max_lives | 0.0   | 0.1   | 0.2   | 0.3   | 0.4   | 0.5   | 0.6   | 0.7   | 0.8   | 0.9   | 1.0   |
|----------:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|
| 1         | 0.020 | 0.008 | 0.002 | 0.000 | 0.001 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| 2         | 0.067 | 0.030 | 0.015 | 0.006 | 0.002 | 0.001 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 |
| 3         | 0.148 | 0.079 | 0.032 | 0.022 | 0.004 | 0.004 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| 4         | 0.239 | 0.113 | 0.057 | 0.022 | 0.009 | 0.005 | 0.000 | 0.002 | 0.000 | 0.000 | 0.000 |
| 5         | 0.360 | 0.188 | 0.090 | 0.051 | 0.018 | 0.008 | 0.001 | 0.002 | 0.001 | 0.000 | 0.000 |
| 6         | 0.493 | 0.268 | 0.128 | 0.066 | 0.025 | 0.015 | 0.007 | 0.003 | 0.000 | 0.000 | 0.000 |
| 7         | 0.581 | 0.291 | 0.152 | 0.066 | 0.045 | 0.017 | 0.006 | 0.001 | 0.000 | 0.000 | 0.000 |
| 8         | 0.659 | 0.338 | 0.197 | 0.080 | 0.035 | 0.017 | 0.006 | 0.003 | 0.000 | 0.000 | 0.000 |
| 9         | 0.730 | 0.377 | 0.200 | 0.099 | 0.043 | 0.020 | 0.005 | 0.001 | 0.000 | 0.001 | 0.000 |
| 10        | 0.777 | 0.414 | 0.226 | 0.094 | 0.051 | 0.025 | 0.009 | 0.005 | 0.000 | 0.001 | 0.000 |