# Analysis

### Setup

In [None]:
from env import Hangman, HangmanCheat
from network import NNAgent, Network, CheatAgent

In [None]:
# load the orignal policy
policy_network = Network()
policy_network.load_weights('models/policy.h5') #input p

## *Honest* case

Analyze the given policy in the case, where the environment (so the second player) is honest.

Derive how many words are guessed correctly using the top 1000 frequent words for different maximum number of guesses.

### Original Policy

In [None]:
from tqdm import tqdm

# results dictionary for max_lives in range 1-10
results = {}

# original agent
player = NNAgent(policy_network)
player.eval()

for max_lives in range(1, 11):
    print(f"Testing with max_lives = {max_lives}")
    print("=====================================")

    # (re-)initialize the environment
    env = Hangman('words/top_1000.txt' , max_lives = max_lives, verbose = False)
    player.reset_guessed()

    # count the number of correct guesses
    correct = 0

    # loop through all the words
    for word in tqdm(env.words):
        # reset the environment
        state = env.reset(word)
        done = False
        # play the game
        while not done:
            guess = player.select_action(state)
            state, _ , done , _ = env.step(guess)
        # check if the word is guessed correctly
        if env.is_game_won():
            correct += 1
        player.reset_guessed()
    results[max_lives] = correct / len(env.words)

print(results)

### Fine-Tuned Policy

In [None]:
from tqdm import tqdm

# results dictionary for max_lives in range 1-10
results = {}

# fine-tuned agent
player = CheatAgent(policy_network)

for max_lives in range(1, 11):
    print(f"Testing with max_lives = {max_lives}")
    print("=====================================")

    # (re-)initialize the environment
    env = Hangman('words/top_1000.txt' , max_lives = max_lives, verbose = False)
    player.reset_guessed()

    # count the number of correct guesses
    correct = 0

    # loop through all the words
    for word in tqdm(env.words):
        # reset the environment
        state = env.reset(word)
        done = False
        # play the game
        while not done:
            guess = player.select_action(state)
            state, _ , done , _ = env.step(guess)
        # check if the word is guessed correctly
        if env.is_game_won():
            correct += 1
        player.reset_guessed()
    results[max_lives] = correct / len(env.words)

print(results)

## *Cheat* case


### Original Policy
Analyze the given policy in the case, where the environment (so the second player) is cheating.

Keep the number of maximum guesses fixed at first. Derive how many words are guessed correctly using the top 1000 frequent words and vary the cheating factor.

In [None]:
from tqdm import tqdm

# results dictionary for reject_rate in range 0-1 and max_lives in range 1-10
results = {}

# original agent
player = NNAgent(policy_network)
player.eval()

# reject rate
reject_rate = 0.1

for max_lives in range(1, 11):
    print(f"Testing with max_lives = {max_lives}")
    print("=====================================")

    # (re-)initialize the environment
    env = HangmanCheat('words/sample_1000.txt' , max_lives = max_lives, verbose = False, reject_rate=reject_rate)
    player.reset_guessed()

    # count the number of correct guesses
    correct = 0

    # loop through all the words
    for word in tqdm(env.words):
        # reset the environment
        state = env.reset(word)
        done = False
        # play the game
        while not done:
            guess = player.select_action(state)
            state, _ , done , _ = env.step(guess)
        # check if the word is guessed correctly
        if env.is_game_won():
            correct += 1
        player.reset_guessed()
    results[(max_lives, reject_rate)] = correct / len(env.words)

    print(results)

### Fine-Tuned Policy

Fine-tune the policy to the cheating environment. Derive how many words are guessed correctly using the top 1000 frequent words and vary the cheating factor.

In [None]:
# print a loss curve for the policy at different epochs
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

cheat_player = CheatAgent(Network())

results = {
    'epoch': [],
    'loss': []
}

for ep in tqdm(range(0, 20001, 1000)):
    cheat_player.load_weights(f'models/finetuned/policy_finetuned_{ep}.h5')
    cheat_player.reset_guessed()
    cheat_env = HangmanCheat('words/top_1000.txt' , max_lives = 6, verbose = False)

    # take an average loss of 20 random sampled words
    losses = []
    for _ in range(20):
        state = cheat_env.reset()
        done = False
        while not done:
            guess = cheat_player.select_action(state)
            state, reward, done, ans = cheat_env.step(guess)
        cheat_player.finalize_episode(ans)
        loss = cheat_player.train_model()
        losses.append(loss)
    
    results['epoch'].append(ep)
    results['loss'].append(np.mean(losses))

# save the results
import pandas as pd
loss_df = pd.DataFrame(results)
loss_df.to_csv('results/finetuning_loss.csv', index=False)

In [None]:
import matplotlib.pyplot as plt

plt.plot(results['epoch'], results['loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Finetuning Loss')
plt.tight_layout()
plt.savefig('results/finetuning_loss.png')

In [None]:
from tqdm import tqdm

# results dictionary for reject_rate in range 0-1 and max_lives in range 1-10
results = {
    'max_lives': [],
    'reject_rate': [],
    'win_rate': [],
    'num_words_cheated': [],
    'num_words_doubted': [],
    'num_words_doubted_correct': []
}

# fine-tuned agent
cheat_player = CheatAgent(Network())
cheat_player.load_weights('models/finetuned/policy_finetuned_3000.h5')
cheat_player.eval()

# reject rate
reject_rate = 0.1

for max_lives in range(9, 11):
    print(f"Testing with max_lives = {max_lives}")
    print("=====================================")

    # (re-)initialize the environment
    env = HangmanCheat('words/top_1000.txt' , max_lives = max_lives, verbose = False, reject_rate=reject_rate)
    cheat_player.reset_guessed()

    # count the number of correct guesses
    wins = 0
    num_words_cheated = 0
    num_words_doubted = 0
    num_words_doubted_correct = 0

    # loop through all the words
    for word in tqdm(env.words):
        # reset the environment
        state = env.reset(word)
        done = False
        # play the game
        while not done:
            guess = cheat_player.select_action(state)
            state, _ , done , ans = env.step(guess)
        if env.is_game_won():
            wins += 1
        if ans['cheated'] == True:
            num_words_cheated += 1
        if ans['doubted'] == True:
            num_words_doubted += 1
        if ans['doubted'] == True and ans['cheated'] == True and (ans['doubted_step'] > ans['cheated_step']):
            num_words_doubted_correct += 1
        cheat_player.reset_guessed()

    # append results to the dictionary
    results['max_lives'].append(max_lives)
    results['reject_rate'].append(reject_rate)
    results['win_rate'].append(wins / len(env.words))
    results['num_words_cheated'].append(num_words_cheated)
    results['num_words_doubted'].append(num_words_doubted)
    results['num_words_doubted_correct'].append(num_words_doubted_correct)

    print(f"({max_lives}, {reject_rate}): Win rate {wins / len(env.words)}, Num words cheated {num_words_cheated}, Num words doubted {num_words_doubted}, Num words doubted correct {num_words_doubted_correct}")

    # save the results
    import pandas as pd
    df = pd.DataFrame(results)
    df.to_csv('results/cheat_agent_top_1000_ml9-11.csv')

## Results

#### Top 1000 words, Original Policy

| max_lives | 0.0   | 0.1   | 0.2   | 0.3   | 0.4   | 0.5   | 0.6   | 0.7   | 0.8   | 0.9   | 1.0   |
|----------:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|
| 1         | 0.013 | 0.005 | 0.005 | 0.002 | 0.001 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| 2         | 0.051 | 0.030 | 0.020 | 0.012 | 0.011 | 0.004 | 0.001 | 0.000 | 0.000 | 0.000 | 0.000 |
| 3         | 0.106 | 0.065 | 0.033 | 0.018 | 0.005 | 0.001 | 0.000 | 0.003 | 0.002 | 0.001 | 0.000 |
| 4         | 0.210 | 0.127 | 0.070 | 0.040 | 0.019 | 0.013 | 0.006 | 0.003 | 0.000 | 0.000 | 0.000 |
| 5         | 0.317 | 0.178 | 0.107 | 0.061 | 0.033 | 0.014 | 0.010 | 0.004 | 0.000 | 0.000 | 0.000 |
| 6         | 0.434 | 0.251 | 0.155 | 0.069 | 0.046 | 0.024 | 0.009 | 0.006 | 0.003 | 0.001 | 0.000 |
| 7         | 0.552 | 0.307 | 0.176 | 0.108 | 0.040 | 0.025 | 0.018 | 0.004 | 0.006 | 0.002 | 0.000 |
| 8         | 0.646 | 0.375 | 0.193 | 0.128 | 0.066 | 0.038 | 0.014 | 0.006 | 0.002 | 0.002 | 0.000 |
| 9         | 0.721 | 0.432 | 0.268 | 0.148 | 0.074 | 0.033 | 0.017 | 0.006 | 0.005 | 0.001 | 0.000 |
| 10        | 0.781 | 0.458 | 0.274 | 0.150 | 0.082 | 0.046 | 0.017 | 0.014 | 0.002 | 0.003 | 0.000 |

#### Sample 1000 words, Original Policy

| max_lives | 0.0   | 0.1   | 0.2   | 0.3   | 0.4   | 0.5   | 0.6   | 0.7   | 0.8   | 0.9   | 1.0   |
|----------:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|
| 1         | 0.020 | 0.008 | 0.002 | 0.000 | 0.001 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| 2         | 0.067 | 0.030 | 0.015 | 0.006 | 0.002 | 0.001 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 |
| 3         | 0.148 | 0.079 | 0.032 | 0.022 | 0.004 | 0.004 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
| 4         | 0.239 | 0.113 | 0.057 | 0.022 | 0.009 | 0.005 | 0.000 | 0.002 | 0.000 | 0.000 | 0.000 |
| 5         | 0.360 | 0.188 | 0.090 | 0.051 | 0.018 | 0.008 | 0.001 | 0.002 | 0.001 | 0.000 | 0.000 |
| 6         | 0.493 | 0.268 | 0.128 | 0.066 | 0.025 | 0.015 | 0.007 | 0.003 | 0.000 | 0.000 | 0.000 |
| 7         | 0.581 | 0.291 | 0.152 | 0.066 | 0.045 | 0.017 | 0.006 | 0.001 | 0.000 | 0.000 | 0.000 |
| 8         | 0.659 | 0.338 | 0.197 | 0.080 | 0.035 | 0.017 | 0.006 | 0.003 | 0.000 | 0.000 | 0.000 |
| 9         | 0.730 | 0.377 | 0.200 | 0.099 | 0.043 | 0.020 | 0.005 | 0.001 | 0.000 | 0.001 | 0.000 |
| 10        | 0.777 | 0.414 | 0.226 | 0.094 | 0.051 | 0.025 | 0.009 | 0.005 | 0.000 | 0.001 | 0.000 |

### Top 1000 words, Fine-Tuned Policy

| max_lives | 0.0   | 0.1   | 0.2   | 0.3   | 0.4   | 0.5   | 0.6   | 0.7   | 0.8   | 0.9   | 1.0   |
|----------:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|
| 1         | 0.015 | 0.014 | 0.007 | 0.005 | 0.006 | 0.008 | 0.004 | 0.002 | 0.003 | 0.001 | 0.004 |
| 2         | 0.048 | 0.033 | 0.023 | 0.02  | 0.008 | 0.007 | 0.008 | 0.011 | 0.006 | 0.01  | 0.008 |
| 3         | 0.112 | 0.072 | 0.048 | 0.031 | 0.019 | 0.018 | 0.017 | 0.014 | 0.01  | 0.013 | 0.009 |
| 4         | 0.185 | 0.117 | 0.087 | 0.07  | 0.053 | 0.043 | 0.046 | 0.037 | 0.039 | 0.032 | 0.036 |
| 5         | 0.285 | 0.195 | 0.143 | 0.112 | 0.084 | 0.079 | 0.056 | 0.065 | 0.07  | 0.077 | 0.073 |
| 6         | 0.356 | 0.273 | 0.236 | 0.202 | 0.148 | 0.141 | 0.142 | 0.143 | 0.146 | 0.131 | 0.139 |
| 7         | 0.417 | 0.397 | 0.343 | 0.296 | 0.304 | 0.254 | 0.265 | 0.253 | 0.234 | 0.238 | 0.223 |
| 8         | 0.499 | 0.506 | 0.458 | 0.426 | 0.436 | 0.408 | 0.415 | 0.398 | 0.392 | 0.437 | 0.432 |
| 9         | 0.556 | 0.574 | 0.585 | 0.584 | 0.573 | 0.605 | 0.579 | 0.594 | 0.579 | 0.586 | 0.624 |
| 10        | 0.565 | 0.641 | 0.65  | 0.703 | 0.715 | 0.73  | 0.742 | 0.739 | 0.745 | 0.757 | 0.726 |