<a href="https://colab.research.google.com/github/manuaishika/hangman/blob/main/hangman.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()


Saving words_250000_train.txt to words_250000_train.txt


In [None]:
!pip install requests



first

In [None]:
import json
import requests
import random
import string
import time
import re
import collections
import math

try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

class HangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None):
        self.hangman_url = self.determine_hangman_url()
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []

        full_dictionary_location = "words_250000_train.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location)
        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()

        # Precompute positional letter frequencies
        self.positional_frequencies = {}
        for word in self.full_dictionary:
            word_len = len(word)
            if word_len not in self.positional_frequencies:
                self.positional_frequencies[word_len] = [collections.Counter() for _ in range(word_len)]
            for i, letter in enumerate(word):
                self.positional_frequencies[word_len][i][letter] += 1

        # Precompute bigram frequencies
        self.bigram_frequencies = collections.Counter()
        for word in self.full_dictionary:
            for i in range(len(word) - 1):
                self.bigram_frequencies[(word[i], word[i+1])] += 1

        # Precompute vowel probabilities by word length
        self.vowel_priority = ['e', 'a', 'i', 'o', 'u']
        self.vowel_probs = {}
        for word in self.full_dictionary:
            word_len = len(word)
            if word_len not in self.vowel_probs:
                self.vowel_probs[word_len] = collections.Counter()
            for letter in word:
                if letter in self.vowel_priority:
                    self.vowel_probs[word_len][letter] += 1

        self.current_dictionary = []
        self.guess_count = 0

    @staticmethod
    def determine_hangman_url():
        links = ['https://trexsim.com']
        data = {link: 0 for link in links}
        for link in links:
            requests.get(link)
            for i in range(10):
                s = time.time()
                requests.get(link)
                data[link] = time.time() - s
        link = sorted(data.items(), key=lambda x: x[1])[0][0]
        link += '/trexsim/hangman'
        return link

    def guess(self, word):
        self.guess_count += 1
        clean_word = word[::2].replace("_", ".")
        len_word = len(clean_word)

        # Update current dictionary
        current_dictionary = self.current_dictionary
        new_dictionary = []
        for dict_word in current_dictionary or self.full_dictionary:
            if len(dict_word) != len_word:
                continue
            if re.match(clean_word, dict_word):
                new_dictionary.append(dict_word)

        # Sample dictionary for speed
        if len(new_dictionary) > 10000:
            new_dictionary = random.sample(new_dictionary, 10000)
        self.current_dictionary = new_dictionary

        # Debug logging
        print(f"Guess {self.guess_count}: Pattern = {clean_word}, Dictionary size = {len(new_dictionary)}, Guessed = {self.guessed_letters}")

        # Vowel-first heuristic for first 3 guesses, weighted by word length
        if self.guess_count <= 3 and new_dictionary:
            vowel_scores = self.vowel_probs.get(len_word, collections.Counter())
            total_vowels = sum(vowel_scores.values())
            if total_vowels > 0:
                for vowel in self.vowel_priority:
                    if vowel not in self.guessed_letters:
                        prob = vowel_scores[vowel] / total_vowels
                        print(f"Choosing vowel: {vowel} (prob = {prob:.3f})")
                        return vowel

        # If no words match, use positional frequencies
        if not new_dictionary:
            if len_word in self.positional_frequencies:
                letter_scores = collections.Counter()
                for pos, counter in enumerate(self.positional_frequencies[len_word]):
                    if clean_word[pos] == '.':
                        for letter, count in counter.items():
                            if letter not in self.guessed_letters:
                                letter_scores[letter] += count
                if letter_scores:
                    guess_letter = letter_scores.most_common(1)[0][0]
                    print(f"Fallback to positional: {guess_letter}")
                    return guess_letter
            for letter, _ in self.full_dictionary_common_letter_sorted:
                if letter not in self.guessed_letters:
                    print(f"Fallback to common: {letter}")
                    return letter
            print("Default fallback: e")
            return 'e'

        # Calculate information gain with frequency and bigram weighting
        letter_scores = collections.Counter()
        total_letters = sum(len(word) for word in new_dictionary)
        letter_freq = collections.Counter(''.join(new_dictionary))

        # Bigram boost for letters following known ones
        bigram_boost = collections.Counter()
        for i, char in enumerate(clean_word):
            if char != '.' and i < len_word - 1 and clean_word[i+1] == '.':
                for next_letter, count in self.bigram_frequencies.items():
                    if next_letter[0] == char and next_letter[1] not in self.guessed_letters:
                        bigram_boost[next_letter[1]] += count

        for letter in string.ascii_lowercase:
            if letter in self.guessed_letters:
                continue
            pattern_counts = collections.Counter()
            for dict_word in new_dictionary:
                new_pattern = list(clean_word)
                for i, char in enumerate(dict_word):
                    if char == letter and new_pattern[i] == '.':
                        new_pattern[i] = letter
                pattern_counts[''.join(new_pattern)] += 1

            entropy = 0
            total_words = len(new_dictionary)
            for count in pattern_counts.values():
                prob = count / total_words
                entropy -= prob * (prob and math.log2(prob) or 0)

            # Weight by frequency and bigram
            freq_weight = letter_freq.get(letter, 0) / total_letters if total_letters > 0 else 1
            bigram_weight = bigram_boost.get(letter, 0) / sum(bigram_boost.values()) if sum(bigram_boost.values()) > 0 else 1
            letter_scores[letter] = entropy * (1 + freq_weight * 1.5 + bigram_weight * 0.7)

        if letter_scores:
            guess_letter = letter_scores.most_common(1)[0][0]
            print(f"Information gain choice: {guess_letter}")
            return guess_letter

        # Fallback to positional frequencies
        for pos, counter in enumerate(self.positional_frequencies.get(len_word, [])):
            if clean_word[pos] == '.':
                for letter, count in counter.most_common():
                    if letter not in self.guessed_letters:
                        print(f"Positional fallback: {letter}")
                        return letter
        for letter, _ in self.full_dictionary_common_letter_sorted:
            if letter not in self.guessed_letters:
                print(f"Final fallback: {letter}")
                return letter
        print("Default fallback: e")
        return 'e'

    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location, "r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary

    def start_game(self, practice=True, verbose=True):
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
        self.guess_count = 0
        response = self.request("/new_game", {"practice": practice})
        if response.get('status') == "approved":
            game_id = response.get('game_id')
            word = response.get('word')
            tries_remains = response.get('tries_remains')
            if verbose:
                print("Successfully start a new game! Game ID: {0}. # of tries remaining: {1}. Word: {2}.".format(game_id, tries_remains, word))
            while tries_remains > 0:
                guess_letter = self.guess(word)
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print("Guessing letter: {0}".format(guess_letter))
                try:
                    res = self.request("/guess_letter", {"request": "guess_letter", "game_id": game_id, "letter": guess_letter})
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print('Other exception caught on request.')
                    raise e
                if verbose:
                    print("Server response: {0}".format(res))
                status = res.get('status')
                tries_remains = res.get('tries_remains')
                if status == "success":
                    if verbose:
                        print("Successfully finished game: {0}".format(game_id))
                    return True
                elif status == "failed":
                    reason = res.get('reason', '# of tries exceeded!')
                    if verbose:
                        print("Failed game: {0}. Because of: {1}".format(game_id, reason))
                    return False
                elif status == "ongoing":
                    word = res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")
        return status == "success"

    def my_status(self):
        return self.request("/my_status", {})

    def request(self, path, args=None, post_args=None, method=None):
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"
        if self.access_token:
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token
        time.sleep(0.2)
        num_retry, time_sleep = 50, 2
        for it in range(num_retry):
            try:
                response = self.session.request(
                    method or "GET",
                    self.hangman_url + path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                break
            except requests.HTTPError as e:
                response = json.loads(e.read())
                raise HangmanAPIError(response)
            except requests.exceptions.SSLError as e:
                if it + 1 == num_retry:
                    raise
                time.sleep(time_sleep)
        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')
        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result

class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""
        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result
        Exception.__init__(self, self.message)

# Dictionary Analysis
def load_dictionary(file_path):
    with open(file_path, 'r') as f:
        return f.read().splitlines()

dictionary = load_dictionary('words_250000_train.txt')
length_counts = collections.Counter(len(word) for word in dictionary)
print("Word length distribution:")
for length, count in sorted(length_counts.items()):
    print(f"Length {length}: {count} words ({count/len(dictionary)*100:.2f}%)")

letter_counts = collections.Counter(''.join(dictionary))
print("\nLetter frequencies:")
for letter, count in letter_counts.most_common():
    print(f"{letter}: {count} occurrences ({count/sum(letter_counts.values())*100:.2f}%)")

# Testing the Algorithm
api = HangmanAPI(access_token="2dec2f1568ae8036660de2a01d5c2b", timeout=2000)
for i in range(100):
    print(f'Playing practice game {i+1}')
    api.start_game(practice=1, verbose=True)
    time.sleep(0.5)

[total_practice_runs, total_recorded_runs, total_recorded_successes, total_practice_successes] = api.my_status()
practice_success_rate = total_practice_successes / total_practice_runs if total_practice_runs > 0 else 0
print(f'Ran {total_practice_runs} practice games. Success rate: {practice_success_rate:.3f}')

# Recorded Games - Uncomment when ready
"""
for i in range(1000):
    print(f'Playing recorded game {i+1}')
    api.start_game(practice=0, verbose=False)
    time.sleep(0.5)

[total_practice_runs, total_recorded_runs, total_recorded_successes, total_practice_successes] = api.my_status()
success_rate = total_recorded_successes / total_recorded_runs if total_recorded_runs > 0 else 0
print(f'Overall recorded success rate: {success_rate:.3f}')
"""

Word length distribution:
Length 1: 17 words (0.01%)
Length 2: 264 words (0.12%)
Length 3: 2201 words (0.97%)
Length 4: 5287 words (2.33%)
Length 5: 11274 words (4.96%)
Length 6: 19541 words (8.60%)
Length 7: 25948 words (11.42%)
Length 8: 30452 words (13.40%)
Length 9: 30906 words (13.60%)
Length 10: 26953 words (11.86%)
Length 11: 22786 words (10.02%)
Length 12: 18178 words (8.00%)
Length 13: 12956 words (5.70%)
Length 14: 8710 words (3.83%)
Length 15: 5211 words (2.29%)
Length 16: 3143 words (1.38%)
Length 17: 1775 words (0.78%)
Length 18: 859 words (0.38%)
Length 19: 441 words (0.19%)
Length 20: 225 words (0.10%)
Length 21: 98 words (0.04%)
Length 22: 44 words (0.02%)
Length 23: 14 words (0.01%)
Length 24: 9 words (0.00%)
Length 25: 3 words (0.00%)
Length 27: 2 words (0.00%)
Length 28: 1 words (0.00%)
Length 29: 2 words (0.00%)

Letter frequencies:
e: 233745 occurrences (11.00%)
i: 184746 occurrences (8.69%)
a: 179837 occurrences (8.46%)
n: 152259 occurrences (7.17%)
o: 150052 occu

"\nfor i in range(1000):\n    print(f'Playing recorded game {i+1}')\n    api.start_game(practice=0, verbose=False)\n    time.sleep(0.5)\n\n[total_practice_runs, total_recorded_runs, total_recorded_successes, total_practice_successes] = api.my_status()\nsuccess_rate = total_recorded_successes / total_recorded_runs if total_recorded_runs > 0 else 0\nprint(f'Overall recorded success rate: {success_rate:.3f}')\n"

another one


In [None]:
import json
import requests
import random
import string
import time
import re
import collections
import math

try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

class HangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None):
        self.hangman_url = self.determine_hangman_url()
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []

        full_dictionary_location = "words_250000_train.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location)
        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()

        # Precompute positional letter frequencies
        self.positional_frequencies = {}
        for word in self.full_dictionary:
            word_len = len(word)
            if word_len not in self.positional_frequencies:
                self.positional_frequencies[word_len] = [collections.Counter() for _ in range(word_len)]
            for i, letter in enumerate(word):
                self.positional_frequencies[word_len][i][letter] += 1

        # Precompute bigram frequencies
        self.bigram_frequencies = collections.Counter()
        for word in self.full_dictionary:
            for i in range(len(word) - 1):
                self.bigram_frequencies[(word[i], word[i+1])] += 1

        # Precompute vowel probabilities by word length
        self.vowel_priority = ['e', 'a', 'i', 'o', 'u']
        self.vowel_probs = {}
        for word in self.full_dictionary:
            word_len = len(word)
            if word_len not in self.vowel_probs:
                self.vowel_probs[word_len] = collections.Counter()
            for letter in word:
                if letter in self.vowel_priority:
                    self.vowel_probs[word_len][letter] += 1

        # Fallback letter order (English frequency)
        self.fallback_order = ['e', 'a', 'r', 'i', 'o', 't', 'n', 's', 'l', 'c', 'u', 'd', 'p', 'm', 'h', 'g', 'b', 'f', 'y', 'w', 'k', 'v', 'x', 'z', 'j', 'q']
        self.current_dictionary = []
        self.guess_count = 0

    @staticmethod
    def determine_hangman_url():
        links = ['https://trexsim.com']
        data = {link: 0 for link in links}
        for link in links:
            requests.get(link)
            for i in range(10):
                s = time.time()
                requests.get(link)
                data[link] = time.time() - s
        link = sorted(data.items(), key=lambda x: x[1])[0][0]
        link += '/trexsim/hangman'
        return link

    def guess(self, word):
        self.guess_count += 1
        clean_word = word[::2].replace("_", ".")
        len_word = len(clean_word)

        # Update current dictionary
        current_dictionary = self.current_dictionary
        new_dictionary = []
        for dict_word in current_dictionary or self.full_dictionary:
            if len(dict_word) != len_word:
                continue
            if re.match(clean_word, dict_word):
                new_dictionary.append(dict_word)

        # Sample dictionary for speed
        if len(new_dictionary) > 5000:
            new_dictionary = random.sample(new_dictionary, 5000)
        self.current_dictionary = new_dictionary

        # Debug logging
        print(f"Guess {self.guess_count}: Pattern = {clean_word}, Dictionary size = {len(new_dictionary)}, Guessed = {self.guessed_letters}")

        # Vowel-first heuristic for first 4 guesses
        if self.guess_count <= 4 and new_dictionary:
            vowel_scores = self.vowel_probs.get(len_word, collections.Counter())
            total_vowels = sum(vowel_scores.values())
            if total_vowels > 0:
                for vowel in self.vowel_priority:
                    if vowel not in self.guessed_letters:
                        prob = vowel_scores[vowel] / total_vowels
                        print(f"Choosing vowel: {vowel} (prob = {prob:.3f})")
                        return vowel

        # If no words match, use positional frequencies or fallback order
        if not new_dictionary:
            if len_word in self.positional_frequencies:
                letter_scores = collections.Counter()
                for pos, counter in enumerate(self.positional_frequencies[len_word]):
                    if clean_word[pos] == '.':
                        for letter, count in counter.items():
                            if letter not in self.guessed_letters:
                                letter_scores[letter] += count
                if letter_scores:
                    guess_letter = letter_scores.most_common(1)[0][0]
                    print(f"Fallback to positional: {guess_letter}")
                    return guess_letter
            for letter in self.fallback_order:
                if letter not in self.guessed_letters:
                    print(f"Fallback to common: {letter}")
                    return letter
            print("Default fallback: e")
            return 'e'

        # Calculate information gain with frequency and bigram weighting
        letter_scores = collections.Counter()
        total_letters = sum(len(word) for word in new_dictionary)
        letter_freq = collections.Counter(''.join(new_dictionary))

        # Bigram boost
        bigram_boost = collections.Counter()
        for i, char in enumerate(clean_word):
            if char != '.' and i < len_word - 1 and clean_word[i+1] == '.':
                for next_letter, count in self.bigram_frequencies.items():
                    if next_letter[0] == char and next_letter[1] not in self.guessed_letters:
                        bigram_boost[next_letter[1]] += count

        for letter in string.ascii_lowercase:
            if letter in self.guessed_letters:
                continue
            pattern_counts = collections.Counter()
            for dict_word in new_dictionary:
                new_pattern = list(clean_word)
                for i, char in enumerate(dict_word):
                    if char == letter and new_pattern[i] == '.':
                        new_pattern[i] = letter
                pattern_counts[''.join(new_pattern)] += 1

            entropy = 0
            total_words = len(new_dictionary)
            for count in pattern_counts.values():
                prob = count / total_words
                entropy -= prob * (prob and math.log2(prob) or 0)

            # Weight by frequency and bigram
            freq_weight = letter_freq.get(letter, 0) / total_letters if total_letters > 0 else 1
            bigram_weight = bigram_boost.get(letter, 0) / sum(bigram_boost.values()) if sum(bigram_boost.values()) > 0 else 1
            letter_scores[letter] = entropy * (1 + freq_weight * 2.0 + bigram_weight * 1.0)

        if letter_scores:
            guess_letter = letter_scores.most_common(1)[0][0]
            print(f"Information gain choice: {guess_letter}")
            return guess_letter

        # Fallback to positional frequencies
        for pos, counter in enumerate(self.positional_frequencies.get(len_word, [])):
            if clean_word[pos] == '.':
                for letter, count in counter.most_common():
                    if letter not in self.guessed_letters:
                        print(f"Positional fallback: {letter}")
                        return letter
        for letter in self.fallback_order:
            if letter not in self.guessed_letters:
                print(f"Final fallback: {letter}")
                return letter
        print("Default fallback: e")
        return 'e'

    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location, "r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary

    def start_game(self, practice=True, verbose=True):
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
        self.guess_count = 0
        response = self.request("/new_game", {"practice": practice})
        if response.get('status') == "approved":
            game_id = response.get('game_id')
            word = response.get('word')
            tries_remains = response.get('tries_remains')
            if verbose:
                print("Successfully start a new game! Game ID: {0}. # of tries remaining: {1}. Word: {2}.".format(game_id, tries_remains, word))
            while tries_remains > 0:
                guess_letter = self.guess(word)
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print("Guessing letter: {0}".format(guess_letter))
                try:
                    res = self.request("/guess_letter", {"request": "guess_letter", "game_id": game_id, "letter": guess_letter})
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print('Other exception caught on request.')
                    raise e
                if verbose:
                    print("Server response: {0}".format(res))
                status = res.get('status')
                tries_remains = res.get('tries_remains')
                if status == "success":
                    if verbose:
                        print("Successfully finished game: {0}".format(game_id))
                    return True
                elif status == "failed":
                    reason = res.get('reason', '# of tries exceeded!')
                    if verbose:
                        print("Failed game: {0}. Because of: {1}".format(game_id, reason))
                    return False
                elif status == "ongoing":
                    word = res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")
        return status == "success"

    def my_status(self):
        return self.request("/my_status", {})

    def request(self, path, args=None, post_args=None, method=None):
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"
        if self.access_token:
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token
        time.sleep(0.2)
        num_retry, time_sleep = 50, 2
        for it in range(num_retry):
            try:
                response = self.session.request(
                    method or "GET",
                    self.hangman_url + path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                break
            except requests.HTTPError as e:
                response = json.loads(e.read())
                raise HangmanAPIError(response)
            except requests.exceptions.SSLError as e:
                if it + 1 == num_retry:
                    raise
                time.sleep(time_sleep)
        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')
        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result

class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""
        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result
        Exception.__init__(self, self.message)

# Dictionary Analysis
def load_dictionary(file_path):
    with open(file_path, 'r') as f:
        return f.read().splitlines()

dictionary = load_dictionary('words_250000_train.txt')
length_counts = collections.Counter(len(word) for word in dictionary)
print("Word length distribution:")
for length, count in sorted(length_counts.items()):
    print(f"Length {length}: {count} words ({count/len(dictionary)*100:.2f}%)")

letter_counts = collections.Counter(''.join(dictionary))
print("\nLetter frequencies:")
for letter, count in letter_counts.most_common():
    print(f"{letter}: {count} occurrences ({count/sum(letter_counts.values())*100:.2f}%)")

# Testing the Algorithm
api = HangmanAPI(access_token="2dec2f1568ae8036660de2a01d5c2b", timeout=2000)
for i in range(200):
    print(f'Playing practice game {i+1}')
    api.start_game(practice=1, verbose=True)
    time.sleep(0.5)

[total_practice_runs, total_recorded_runs, total_recorded_successes, total_practice_successes] = api.my_status()
practice_success_rate = total_practice_successes / total_practice_runs if total_practice_runs > 0 else 0
print(f'Ran {total_practice_runs} practice games. Success rate: {practice_success_rate:.3f}')

# Recorded Games - Uncomment when ready
"""
for i in range(1000):
    print(f'Playing recorded game {i+1}')
    api.start_game(practice=0, verbose=False)
    time.sleep(0.5)

[total_practice_runs, total_recorded_runs, total_recorded_successes, total_practice_successes] = api.my_status()
success_rate = total_recorded_successes / total_recorded_runs if total_recorded_runs > 0 else 0
print(f'Overall recorded success rate: {success_rate:.3f}')
"""

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Information gain choice: c
Guessing letter: c
Server response: {'game_id': '333b8904ddd4', 'status': 'ongoing', 'tries_remains': 2, 'word': 's a c _ i s t _ '}
Guess 10: Pattern = sac.ist., Dictionary size = 1, Guessed = ['e', 'a', 'i', 'o', 's', 'l', 't', 'b', 'c']
Information gain choice: d
Guessing letter: d
Server response: {'game_id': '333b8904ddd4', 'status': 'ongoing', 'tries_remains': 1, 'word': 's a c _ i s t _ '}
Guess 11: Pattern = sac.ist., Dictionary size = 1, Guessed = ['e', 'a', 'i', 'o', 's', 'l', 't', 'b', 'c', 'd']
Information gain choice: f
Guessing letter: f
Server response: {'game_id': '333b8904ddd4', 'status': 'failed', 'tries_remains': 0, 'word': 's a c _ i s t _ '}
Failed game: 333b8904ddd4. Because of: # of tries exceeded!
Playing practice game 96
Successfully start a new game! Game ID: 92c8c57f6166. # of tries remaining: 6. Word: _ _ _ _ _ .
Guess 1: Pattern = ....., Dictionary size = 5000, Guess

"\nfor i in range(1000):\n    print(f'Playing recorded game {i+1}')\n    api.start_game(practice=0, verbose=False)\n    time.sleep(0.5)\n\n[total_practice_runs, total_recorded_runs, total_recorded_successes, total_practice_successes] = api.my_status()\nsuccess_rate = total_recorded_successes / total_recorded_runs if total_recorded_runs > 0 else 0\nprint(f'Overall recorded success rate: {success_rate:.3f}')\n"

APP


startinf new


debugging


In [None]:
import json
import requests
import random
import string
import time
import re
import collections
import math
from tqdm import tqdm
import numpy as np
import os

try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

class HangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None, dictionary_path="words_250000_train.txt"):
        self.hangman_url = "https://trexsim.com/trexsim/hangman"
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []

        self.full_dictionary = self.build_dictionary(dictionary_path)
        self.full_dictionary = [word.lower() for word in self.full_dictionary]

        # Precompute statistics
        self.letter_frequencies = self.calculate_letter_frequencies()
        self.positional_frequencies = self.calculate_positional_frequencies()
        self.bigram_frequencies = self.calculate_bigram_frequencies()
        self.common_suffixes = self.identify_common_suffixes()

        # Initialize game state
        self.current_dictionary = []
        self.guess_count = 0
        self.current_pattern = ""

    def calculate_letter_frequencies(self):
        """Calculate overall letter frequencies from dictionary"""
        text = ''.join(self.full_dictionary)
        return collections.Counter(text)

    def calculate_positional_frequencies(self):
        """Calculate letter frequencies by word position"""
        positional_freq = {}
        max_length = max(len(word) for word in self.full_dictionary)

        for length in range(1, max_length + 1):
            positional_freq[length] = [collections.Counter() for _ in range(length)]

        for word in self.full_dictionary:
            n = len(word)
            if n > max_length or n not in positional_freq:
                continue
            for i, char in enumerate(word):
                positional_freq[n][i][char] += 1

        return positional_freq

    def calculate_bigram_frequencies(self):
        """Calculate bigram frequencies from dictionary"""
        bigrams = collections.Counter()
        for word in self.full_dictionary:
            for i in range(len(word) - 1):
                bigrams[(word[i], word[i+1])] += 1
        return bigrams

    def identify_common_suffixes(self):
        """Identify common suffixes in the dictionary"""
        suffix_counter = collections.Counter()
        for word in self.full_dictionary:
            if len(word) > 2:
                for i in range(1, min(4, len(word))):
                    suffix = word[-i:]
                    suffix_counter[suffix] += 1
        return [suffix for suffix, _ in suffix_counter.most_common(10)]

    def build_dictionary(self, dictionary_file_location):
        with open(dictionary_file_location, "r") as text_file:
            full_dictionary = text_file.read().splitlines()
        return [word.lower() for word in full_dictionary]

    def pattern_match(self, pattern, word):
        """Check if word matches the pattern"""
        if len(pattern) != len(word):
            return False
        for p, w in zip(pattern, word):
            if p != '.' and p != w:
                return False
        return True

    def filter_dictionary(self, pattern):
        """Filter dictionary based on current pattern"""
        return [word for word in self.current_dictionary if self.pattern_match(pattern, word)]

    def calculate_entropy(self, letter, possible_words):
        """Calculate information gain for a letter"""
        pattern_counts = collections.defaultdict(int)
        total_words = len(possible_words)

        for word in possible_words:
            # Create new pattern with guessed letter
            new_pattern = list(self.current_pattern)
            for i, char in enumerate(word):
                if char == letter and new_pattern[i] == '.':
                    new_pattern[i] = letter
            pattern_counts[''.join(new_pattern)] += 1

        entropy = 0
        for count in pattern_counts.values():
            p = count / total_words
            if p > 0:  # Avoid log(0)
                entropy -= p * math.log2(p)

        return entropy

    def guess(self, word):
        self.guess_count += 1
        self.current_pattern = word[::2].replace("_", ".")
        word_len = len(self.current_pattern)

        # Initialize dictionary on first guess
        if self.guess_count == 1:
            self.current_dictionary = [w for w in self.full_dictionary if len(w) == word_len]
        else:
            # Update dictionary based on current pattern
            self.current_dictionary = self.filter_dictionary(self.current_pattern)

        # If dictionary is too large, sample randomly
        if len(self.current_dictionary) > 10000:
            self.current_dictionary = random.sample(self.current_dictionary, 10000)

        # Get possible letters not guessed
        possible_letters = set(string.ascii_lowercase) - set(self.guessed_letters)
        if not possible_letters:
            return random.choice(string.ascii_lowercase)

        # If few words left, just pick the next letter from any word
        if len(self.current_dictionary) <= 2:
            for word in self.current_dictionary:
                for letter in word:
                    if letter in possible_letters:
                        return letter

        # Calculate entropy for each possible letter
        entropy_scores = {}
        for letter in possible_letters:
            entropy = self.calculate_entropy(letter, self.current_dictionary)
            entropy_scores[letter] = entropy

        # Find letter with highest entropy
        best_letter = max(entropy_scores, key=entropy_scores.get, default=None)

        if best_letter:
            return best_letter

        # Fallback strategies
        # Positional frequency fallback
        if word_len in self.positional_frequencies:
            for i in range(word_len):
                if self.current_pattern[i] == '.':
                    for letter, _ in self.positional_frequencies[word_len][i].most_common():
                        if letter in possible_letters:
                            return letter

        # Suffix matching fallback
        if word_len > 3:
            for suffix in self.common_suffixes:
                if len(suffix) < word_len:
                    start_pos = word_len - len(suffix)
                    if all(self.current_pattern[i] == '.' for i in range(start_pos, word_len)):
                        for letter in suffix:
                            if letter in possible_letters:
                                return letter

        # Overall frequency fallback
        for letter, _ in self.letter_frequencies.most_common():
            if letter in possible_letters:
                return letter

        # Final fallback to sequential letters
        for letter in string.ascii_lowercase:
            if letter in possible_letters:
                return letter

        return 'e'  # Ultimate fallback

    def start_game(self, practice=True, verbose=True):
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
        self.guess_count = 0
        response = self.request("/new_game", {"practice": practice})
        if response.get('status') == "approved":
            game_id = response.get('game_id')
            word = response.get('word')
            tries_remains = response.get('tries_remains')
            if verbose:
                print(f"Started game {game_id}. Tries remaining: {tries_remains}. Word: {word}")
            while tries_remains > 0:
                guess_letter = self.guess(word)
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print(f"Guessing letter: {guess_letter}")
                try:
                    res = self.request("/guess_letter", {"request": "guess_letter", "game_id": game_id, "letter": guess_letter})
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print(f'Other exception caught on request: {e}')
                    raise
                if verbose:
                    print(f"Server response: {res}")
                status = res.get('status')
                tries_remains = res.get('tries_remains')
                if status == "success":
                    if verbose:
                        print(f"Successfully finished game: {game_id}")
                    return True
                elif status == "failed":
                    reason = res.get('reason', '# of tries exceeded!')
                    if verbose:
                        print(f"Failed game: {game_id}. Reason: {reason}")
                    return False
                elif status == "ongoing":
                    word = res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")
        return status == "success"

    def my_status(self):
        return self.request("/my_status", {})

    def request(self, path, args=None, post_args=None, method=None):
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"
        if self.access_token:
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token
        time.sleep(0.2)
        num_retry, time_sleep = 50, 2
        for it in range(num_retry):
            try:
                response = self.session.request(
                    method or "GET",
                    self.hangman_url + path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                break
            except requests.HTTPError as e:
                response = json.loads(e.read())
                raise HangmanAPIError(response)
            except requests.exceptions.SSLError as e:
                if it + 1 == num_retry:
                    raise
                time.sleep(time_sleep)
        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')
        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result

    def reset_game_state(self):
        """Reset game state without starting a new server game"""
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
        self.guess_count = 0
        self.current_pattern = ""

    def simulate(self, word, verbose=False):
        """Simulate a game locally without using the server"""
        self.reset_game_state()
        max_incorrect = 6
        incorrect_guesses = 0
        display_word = ['_'] * len(word)
        word = word.lower()

        if verbose:
            print(f"\nStarting simulation for word: {word}")

        while incorrect_guesses < max_incorrect:
            # Format display for solver (with spaces between characters)
            formatted_display = " ".join(display_word)

            # Get algorithm's guess
            guess = self.guess(formatted_display)

            if guess in self.guessed_letters:
                if verbose:
                    print(f"Duplicate guess: {guess}")
                continue

            self.guessed_letters.append(guess)

            if verbose:
                print(f"Guess #{self.guess_count}: {guess}")

            # Check if guess is correct
            correct_guess = False
            new_display = list(display_word)
            for i, char in enumerate(word):
                if char == guess:
                    new_display[i] = char
                    correct_guess = True

            display_word = new_display

            if correct_guess:
                if verbose:
                    print(f"Correct! Current: {' '.join(display_word)}")
                # Check if word is complete
                if ''.join(display_word) == word:
                    if verbose:
                        print(f"Solved in {self.guess_count} guesses!")
                    return True
            else:
                incorrect_guesses += 1
                if verbose:
                    print(f"Incorrect! ({incorrect_guesses}/{max_incorrect} mistakes)")

        if verbose:
            print(f"Failed to guess: {word}")
        return False

    def evaluate(self, test_words, num_tests=1000, verbose=False):
        """Evaluate algorithm performance on a set of test words"""
        if not test_words:
            test_words = random.sample(self.full_dictionary, min(num_tests, len(self.full_dictionary)))
        else:
            test_words = random.sample(test_words, min(num_tests, len(test_words)))

        wins = 0
        total = len(test_words)

        print(f"Evaluating on {total} words...")
        for i, word in enumerate(test_words):
            success = self.simulate(word, verbose=False)
            if success:
                wins += 1

            if (i+1) % 100 == 0:
                print(f"Completed {i+1}/{total} games - Current accuracy: {wins/(i+1):.2%}")

        accuracy = wins / total
        print(f"\nEvaluation complete - Accuracy: {accuracy:.2%} ({wins}/{total})")
        return accuracy

class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""
        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result
        Exception.__init__(self, self.message)

def run_simulation(dictionary_path="words_250000_train.txt", num_tests=1000):
    """Run local simulation to evaluate algorithm performance"""
    print("Initializing simulator...")
    solver = HangmanAPI(dictionary_path=dictionary_path)

    # Load dictionary for testing
    with open(dictionary_path, "r") as f:
        full_dictionary = [word.strip().lower() for word in f.readlines()]

    # Create test set
    test_words = random.sample(full_dictionary, min(num_tests, len(full_dictionary)))

    # Run evaluation
    accuracy = solver.evaluate(test_words, num_tests=num_tests)
    return accuracy

def run_server_games(api, num_practice=100, num_recorded=1000):
    """Run games on the server once we're satisfied with performance"""
    # Run practice games
    print("Running practice games...")
    for i in range(num_practice):
        api.start_game(practice=1, verbose=False)
        time.sleep(0.2)

    # Get status and print success rate
    status = api.my_status()
    # Status returns: [total_practice_runs, total_recorded_runs, total_recorded_successes, total_practice_successes]
    total_practice_runs = status[0]
    total_practice_successes = status[3]
    practice_success_rate = total_practice_successes / total_practice_runs if total_practice_runs > 0 else 0
    print(f"Practice success rate: {practice_success_rate:.2%}")

    # Run recorded games
    print("Running recorded games...")
    for i in range(num_recorded):
        api.start_game(practice=0, verbose=False)
        time.sleep(0.2)

    # Get final status
    status = api.my_status()
    total_recorded_runs = status[1]
    total_recorded_successes = status[2]
    recorded_success_rate = total_recorded_successes / total_recorded_runs if total_recorded_runs > 0 else 0
    print(f"Recorded success rate: {recorded_success_rate:.2%}")
    return recorded_success_rate

if __name__ == "__main__":
    # First run local simulation to test performance
    local_accuracy = run_simulation(num_tests=1000)

    if local_accuracy >= 0.60:  # Only run on server if accuracy is good
        print("\nLocal simulation shows good performance. Running on server...")
        api = HangmanAPI(access_token="2dec2f1568ae8036660de2a01d5c2b", timeout=2000)
        server_accuracy = run_server_games(api)
        print(f"Final server accuracy: {server_accuracy:.2%}")
    else:
        print("\nAccuracy too low in simulation. Improve algorithm before running on server.")

Initializing simulator...
Evaluating on 1000 words...
Completed 100/1000 games - Current accuracy: 50.00%
Completed 200/1000 games - Current accuracy: 48.00%
Completed 300/1000 games - Current accuracy: 47.33%
Completed 400/1000 games - Current accuracy: 46.25%
Completed 500/1000 games - Current accuracy: 45.60%
Completed 600/1000 games - Current accuracy: 45.00%
Completed 700/1000 games - Current accuracy: 44.43%
Completed 800/1000 games - Current accuracy: 44.38%
Completed 900/1000 games - Current accuracy: 44.44%
Completed 1000/1000 games - Current accuracy: 45.80%

Evaluation complete - Accuracy: 45.80% (458/1000)

Accuracy too low in simulation. Improve algorithm before running on server.


looks bad

In [None]:
new again



In [None]:
import json
import requests
import random
import string
import time
import re
import collections
import math
from tqdm import tqdm
import os
import numpy as np

try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

class HangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None, dictionary_path="words_250000_train.txt"):
        self.hangman_url = "https://trexsim.com/trexsim/hangman"
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []

        # Load and preprocess dictionary
        self.full_dictionary = self.build_dictionary(dictionary_path)
        self.full_dictionary = [word.lower() for word in self.full_dictionary]

        # Precompute advanced statistics
        self.letter_frequencies = self.calculate_letter_frequencies()
        self.positional_frequencies = self.calculate_positional_frequencies()
        self.bigram_frequencies = self.calculate_bigram_frequencies()
        self.vowel_consonant_balance = self.calculate_vowel_consonant_balance()
        self.common_suffixes = self.identify_common_suffixes()
        self.common_prefixes = self.identify_common_prefixes()
        self.word_length_stats = self.calculate_word_length_stats()

        # Initialize game state
        self.current_dictionary = []
        self.guess_count = 0
        self.current_pattern = ""
        self.revealed_positions = set()
        self.possible_letters = set(string.ascii_lowercase)

    def calculate_letter_frequencies(self):
        """Calculate overall letter frequencies from dictionary"""
        text = ''.join(self.full_dictionary)
        return collections.Counter(text)

    def calculate_positional_frequencies(self):
        """Calculate letter frequencies by word position"""
        positional_freq = {}
        max_length = max(len(word) for word in self.full_dictionary)

        for length in range(1, max_length + 1):
            positional_freq[length] = [collections.Counter() for _ in range(length)]

        for word in self.full_dictionary:
            n = len(word)
            if n > max_length or n not in positional_freq:
                continue
            for i, char in enumerate(word):
                positional_freq[n][i][char] += 1

        return positional_freq

    def calculate_bigram_frequencies(self):
        """Calculate bigram frequencies from dictionary"""
        bigrams = collections.Counter()
        for word in self.full_dictionary:
            for i in range(len(word) - 1):
                bigrams[(word[i], word[i+1])] += 1
        return bigrams

    def calculate_vowel_consonant_balance(self):
        """Calculate vowel vs consonant distribution"""
        vowels = 'aeiou'
        balance = {'vowels': 0, 'consonants': 0}
        for word in self.full_dictionary:
            for char in word:
                if char in vowels:
                    balance['vowels'] += 1
                else:
                    balance['consonants'] += 1
        return balance

    def calculate_word_length_stats(self):
        """Calculate statistics about word lengths"""
        lengths = [len(word) for word in self.full_dictionary]
        counter = collections.Counter(lengths)
        most_common = counter.most_common(5)
        return {
            'min': min(lengths),
            'max': max(lengths),
            'avg': sum(lengths) / len(lengths),
            'common': [l for l, _ in most_common]
        }

    def identify_common_suffixes(self):
        """Identify common suffixes in the dictionary"""
        suffix_counter = collections.Counter()
        for word in self.full_dictionary:
            if len(word) > 2:
                for i in range(1, min(5, len(word))):
                    suffix = word[-i:]
                    suffix_counter[suffix] += 1
        return [suffix for suffix, _ in suffix_counter.most_common(10)]

    def identify_common_prefixes(self):
        """Identify common prefixes in the dictionary"""
        prefix_counter = collections.Counter()
        for word in self.full_dictionary:
            if len(word) > 2:
                for i in range(1, min(5, len(word))):
                    prefix = word[:i]
                    prefix_counter[prefix] += 1
        return [prefix for prefix, _ in prefix_counter.most_common(10)]

    def build_dictionary(self, dictionary_file_location):
        with open(dictionary_file_location, "r") as text_file:
            full_dictionary = text_file.read().splitlines()
        return [word.lower() for word in full_dictionary]

    def pattern_match(self, pattern, word):
        """Check if word matches the pattern"""
        if len(pattern) != len(word):
            return False
        for p, w in zip(pattern, word):
            if p != '.' and p != w:
                return False
        return True

    def filter_dictionary(self, pattern):
        """Filter dictionary based on current pattern"""
        return [word for word in self.current_dictionary if self.pattern_match(pattern, word)]

    def calculate_entropy(self, letter, possible_words):
        """Calculate information gain for a letter"""
        pattern_counts = collections.defaultdict(int)
        total_words = len(possible_words)

        for word in possible_words:
            # Create new pattern with guessed letter
            new_pattern = list(self.current_pattern)
            for i, char in enumerate(word):
                if char == letter and new_pattern[i] == '.':
                    new_pattern[i] = letter
            pattern_counts[''.join(new_pattern)] += 1

        entropy = 0
        for count in pattern_counts.values():
            p = count / total_words
            if p > 0:  # Avoid log(0)
                entropy -= p * math.log2(p)

        return entropy

    def optimized_entropy_guess(self, possible_words):
        """Optimized entropy calculation with early termination and sampling"""
        # If dictionary is too large, use intelligent sampling
        if len(possible_words) > 10000:
            # Prioritize words with more common letters
            common_letters = self.letter_frequencies.most_common(10)
            sampled_words = []
            for word in possible_words:
                if any(letter in word for letter, _ in common_letters):
                    sampled_words.append(word)
                if len(sampled_words) >= 5000:
                    break

            # If we didn't get enough, add random words
            if len(sampled_words) < 5000:
                additional = random.sample(possible_words, min(5000, len(possible_words) - len(sampled_words)))
                sampled_words.extend(additional)
            possible_words = sampled_words

        # Precompute dot indices for efficiency
        dot_indices = [i for i, char in enumerate(self.current_pattern) if char == '.']

        # Get possible letters not guessed
        possible_letters = self.possible_letters - set(self.guessed_letters)
        if not possible_letters:
            return random.choice(string.ascii_lowercase)

        # Calculate entropy for each possible letter
        entropy_scores = {}
        for letter in possible_letters:
            entropy = self.calculate_entropy(letter, possible_words)

            # Apply weighting based on position and bigrams
            weight = 1.0

            # Positional weighting
            for i in dot_indices:
                if letter in self.positional_frequencies.get(len(self.current_pattern), [{}])[i]:
                    freq = self.positional_frequencies[len(self.current_pattern)][i][letter]
                    max_freq = max(self.positional_frequencies[len(self.current_pattern)][i].values()) if self.positional_frequencies[len(self.current_pattern)][i] else 1
                    weight += 0.5 * (freq / max_freq)

            # Bigram weighting
            for i in range(len(self.current_pattern) - 1):
                if self.current_pattern[i] != '.' and self.current_pattern[i+1] == '.':
                    bigram = (self.current_pattern[i], letter)
                    if bigram in self.bigram_frequencies:
                        weight += 0.3 * (self.bigram_frequencies[bigram] / max(1, max(self.bigram_frequencies.values())))

            entropy_scores[letter] = entropy * weight

        # Find letter with highest entropy
        best_letter = max(entropy_scores, key=entropy_scores.get, default=None)
        return best_letter

    def guess(self, word):
        self.guess_count += 1
        self.current_pattern = word[::2].replace("_", ".")
        word_len = len(self.current_pattern)

        # Update revealed positions
        self.revealed_positions = {i for i, char in enumerate(self.current_pattern) if char != '.'}

        # Initialize dictionary on first guess
        if self.guess_count == 1:
            self.current_dictionary = [w for w in self.full_dictionary if len(w) == word_len]
        else:
            # Update dictionary based on current pattern
            self.current_dictionary = self.filter_dictionary(self.current_pattern)

        # Get possible letters not guessed
        possible_letters = self.possible_letters - set(self.guessed_letters)

        # Early game vowel strategy (first 3 guesses)
        vowels = 'aeiou'
        if self.guess_count <= 3:
            # Check if any vowels are revealed
            revealed_vowels = any(char in vowels for char in self.current_pattern if char != '.')

            if not revealed_vowels:
                # Prioritize vowels
                vowel_choices = [v for v in vowels if v in possible_letters]
                if vowel_choices:
                    # Choose most common vowel
                    vowel_scores = {v: self.letter_frequencies.get(v, 0) for v in vowel_choices}
                    return max(vowel_scores, key=vowel_scores.get)

        # If few words left, just pick the next letter from any word
        if len(self.current_dictionary) <= 2:
            for dict_word in self.current_dictionary:
                for letter in dict_word:
                    if letter in possible_letters:
                        return letter
            # Fallback if no letters found
            return random.choice(list(possible_letters))

        # Use optimized entropy calculation
        best_letter = self.optimized_entropy_guess(self.current_dictionary)

        if best_letter:
            return best_letter

        # Fallback strategies
        # 1. Positional frequency fallback
        if word_len in self.positional_frequencies:
            for i in range(word_len):
                if self.current_pattern[i] == '.':
                    for letter, _ in self.positional_frequencies[word_len][i].most_common():
                        if letter in possible_letters:
                            return letter

        # 2. Suffix/prefix matching
        if word_len > 3:
            # Suffix matching
            for suffix in self.common_suffixes:
                if len(suffix) < word_len:
                    start_pos = word_len - len(suffix)
                    if all(self.current_pattern[i] == '.' for i in range(start_pos, word_len)):
                        for letter in suffix:
                            if letter in possible_letters:
                                return letter

            # Prefix matching
            for prefix in self.common_prefixes:
                if len(prefix) < word_len:
                    if all(self.current_pattern[i] == '.' for i in range(len(prefix))):
                        for letter in prefix:
                            if letter in possible_letters:
                                return letter

        # 3. Overall frequency fallback
        for letter, _ in self.letter_frequencies.most_common():
            if letter in possible_letters:
                return letter

        # 4. Final fallback to sequential letters
        return random.choice(list(possible_letters))

    def start_game(self, practice=True, verbose=True):
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
        self.guess_count = 0
        self.current_pattern = ""
        self.revealed_positions = set()
        response = self.request("/new_game", {"practice": practice})
        if response.get('status') == "approved":
            game_id = response.get('game_id')
            word = response.get('word')
            tries_remains = response.get('tries_remains')
            if verbose:
                print(f"Started game {game_id}. Tries remaining: {tries_remains}. Word: {word}")
            while tries_remains > 0:
                guess_letter = self.guess(word)
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print(f"Guessing letter: {guess_letter}")
                try:
                    res = self.request("/guess_letter", {"request": "guess_letter", "game_id": game_id, "letter": guess_letter})
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print(f'Other exception caught on request: {e}')
                    raise
                if verbose:
                    print(f"Server response: {res}")
                status = res.get('status')
                tries_remains = res.get('tries_remains')
                if status == "success":
                    if verbose:
                        print(f"Successfully finished game: {game_id}")
                    return True
                elif status == "failed":
                    reason = res.get('reason', '# of tries exceeded!')
                    if verbose:
                        print(f"Failed game: {game_id}. Reason: {reason}")
                    return False
                elif status == "ongoing":
                    word = res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")
        return status == "success"

    def my_status(self):
        return self.request("/my_status", {})

    def request(self, path, args=None, post_args=None, method=None):
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"
        if self.access_token:
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token
        time.sleep(0.2)
        num_retry, time_sleep = 50, 2
        for it in range(num_retry):
            try:
                response = self.session.request(
                    method or "GET",
                    self.hangman_url + path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                break
            except requests.HTTPError as e:
                response = json.loads(e.read())
                raise HangmanAPIError(response)
            except requests.exceptions.SSLError as e:
                if it + 1 == num_retry:
                    raise
                time.sleep(time_sleep)
        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')
        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result

    def reset_game_state(self):
        """Reset game state without starting a new server game"""
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
        self.guess_count = 0
        self.current_pattern = ""
        self.revealed_positions = set()

    def simulate(self, word, verbose=False):
        """Simulate a game locally without using the server"""
        self.reset_game_state()
        max_incorrect = 6
        incorrect_guesses = 0
        display_word = ['_'] * len(word)
        word = word.lower()

        if verbose:
            print(f"\nStarting simulation for word: {word}")

        while incorrect_guesses < max_incorrect:
            # Format display for solver (with spaces between characters)
            formatted_display = " ".join(display_word)

            # Get algorithm's guess
            guess = self.guess(formatted_display)

            if guess in self.guessed_letters:
                if verbose:
                    print(f"Duplicate guess: {guess}")
                continue

            self.guessed_letters.append(guess)

            if verbose:
                print(f"Guess #{self.guess_count}: {guess}")

            # Check if guess is correct
            correct_guess = False
            new_display = list(display_word)
            for i, char in enumerate(word):
                if char == guess:
                    new_display[i] = char
                    correct_guess = True

            display_word = new_display

            if correct_guess:
                if verbose:
                    print(f"Correct! Current: {' '.join(display_word)}")
                # Check if word is complete
                if ''.join(display_word) == word:
                    if verbose:
                        print(f"Solved in {self.guess_count} guesses!")
                    return True
            else:
                incorrect_guesses += 1
                if verbose:
                    print(f"Incorrect! ({incorrect_guesses}/{max_incorrect} mistakes)")

        if verbose:
            print(f"Failed to guess: {word}")
        return False

    def evaluate(self, test_words, num_tests=1000, verbose=False):
        """Evaluate algorithm performance on a set of test words"""
        if not test_words:
            test_words = random.sample(self.full_dictionary, min(num_tests, len(self.full_dictionary)))
        else:
            test_words = random.sample(test_words, min(num_tests, len(test_words)))

        wins = 0
        total = len(test_words)

        print(f"Evaluating on {total} words...")
        for i, word in enumerate(test_words):
            success = self.simulate(word, verbose=False)
            if success:
                wins += 1

            if (i+1) % 100 == 0:
                print(f"Completed {i+1}/{total} games - Current accuracy: {wins/(i+1):.2%}")

        accuracy = wins / total
        print(f"\nEvaluation complete - Accuracy: {accuracy:.2%} ({wins}/{total})")
        return accuracy

class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""
        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result
        Exception.__init__(self, self.message)

def run_simulation(dictionary_path="words_250000_train.txt", num_tests=1000):
    """Run local simulation to evaluate algorithm performance"""
    print("Initializing simulator...")
    solver = HangmanAPI(dictionary_path=dictionary_path)

    # Load dictionary for testing
    with open(dictionary_path, "r") as f:
        full_dictionary = [word.strip().lower() for word in f.readlines()]

    # Create test set - use most common word lengths
    common_lengths = solver.word_length_stats['common']
    test_words = [word for word in full_dictionary if len(word) in common_lengths]
    test_words = random.sample(test_words, min(num_tests, len(test_words)))

    # Run evaluation
    accuracy = solver.evaluate(test_words, num_tests=num_tests)
    return accuracy

def run_server_games(api, num_practice=100, num_recorded=1000):
    """Run games on the server once we're satisfied with performance"""
    # Run practice games
    print("Running practice games...")
    for i in range(num_practice):
        api.start_game(practice=1, verbose=False)
        time.sleep(0.2)

    # Get status and print success rate
    status = api.my_status()
    # Status returns: [total_practice_runs, total_recorded_runs, total_recorded_successes, total_practice_successes]
    total_practice_runs = status[0]
    total_practice_successes = status[3]
    practice_success_rate = total_practice_successes / total_practice_runs if total_practice_runs > 0 else 0
    print(f"Practice success rate: {practice_success_rate:.2%}")

    # Run recorded games
    print("Running recorded games...")
    for i in range(num_recorded):
        api.start_game(practice=0, verbose=False)
        time.sleep(0.2)

    # Get final status
    status = api.my_status()
    total_recorded_runs = status[1]
    total_recorded_successes = status[2]
    recorded_success_rate = total_recorded_successes / total_recorded_runs if total_recorded_runs > 0 else 0
    print(f"Recorded success rate: {recorded_success_rate:.2%}")
    return recorded_success_rate

if __name__ == "__main__":
    # First run local simulation to test performance
    local_accuracy = run_simulation(num_tests=1000)

    if local_accuracy >= 0.60:  # Only run on server if accuracy is good
        print("\nLocal simulation shows good performance. Running on server...")
        api = HangmanAPI(access_token="2dec2f1568ae8036660de2a01d5c2b", timeout=2000)
        server_accuracy = run_server_games(api)
        print(f"Final server accuracy: {server_accuracy:.2%}")
    else:
        print("\nAccuracy too low in simulation. Improve algorithm before running on server.")

Initializing simulator...
Evaluating on 1000 words...
Completed 100/1000 games - Current accuracy: 93.00%
Completed 200/1000 games - Current accuracy: 92.50%
Completed 300/1000 games - Current accuracy: 92.67%
Completed 400/1000 games - Current accuracy: 92.75%
Completed 500/1000 games - Current accuracy: 92.40%
Completed 600/1000 games - Current accuracy: 92.33%
Completed 700/1000 games - Current accuracy: 92.00%
Completed 800/1000 games - Current accuracy: 91.88%
Completed 900/1000 games - Current accuracy: 91.56%
Completed 1000/1000 games - Current accuracy: 91.90%

Evaluation complete - Accuracy: 91.90% (919/1000)

Local simulation shows good performance. Running on server...
Running practice games...


HangmanAPIError: {'error': 'Your account has been deactivated!'}

too good??

In [None]:
import json
import requests
import random
import string
import time
import re
import collections
import math
from tqdm import tqdm
import numpy as np
import os

try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

class HangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None, dictionary_path="words_250000_train.txt"):
        self.hangman_url = "https://trexsim.com/trexsim/hangman"
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []

        self.full_dictionary = self.build_dictionary(dictionary_path)
        self.full_dictionary = [word.lower() for word in self.full_dictionary]

        # Precompute statistics
        self.letter_frequencies = self.calculate_letter_frequencies()
        self.positional_frequencies = self.calculate_positional_frequencies()
        self.bigram_frequencies = self.calculate_bigram_frequencies()
        self.common_suffixes = self.identify_common_suffixes()

        # Initialize game state
        self.current_dictionary = []
        self.guess_count = 0
        self.current_pattern = ""

    def calculate_letter_frequencies(self):
        """Calculate overall letter frequencies from dictionary"""
        text = ''.join(self.full_dictionary)
        return collections.Counter(text)

    def calculate_positional_frequencies(self):
        """Calculate letter frequencies by word position"""
        positional_freq = {}
        max_length = max(len(word) for word in self.full_dictionary)

        for length in range(1, max_length + 1):
            positional_freq[length] = [collections.Counter() for _ in range(length)]

        for word in self.full_dictionary:
            n = len(word)
            if n > max_length or n not in positional_freq:
                continue
            for i, char in enumerate(word):
                positional_freq[n][i][char] += 1

        return positional_freq

    def calculate_bigram_frequencies(self):
        """Calculate bigram frequencies from dictionary"""
        bigrams = collections.Counter()
        for word in self.full_dictionary:
            for i in range(len(word) - 1):
                bigrams[(word[i], word[i+1])] += 1
        return bigrams

    def identify_common_suffixes(self):
        """Identify common suffixes in the dictionary"""
        suffix_counter = collections.Counter()
        for word in self.full_dictionary:
            if len(word) > 2:
                for i in range(1, min(4, len(word))):
                    suffix = word[-i:]
                    suffix_counter[suffix] += 1
        return [suffix for suffix, _ in suffix_counter.most_common(10)]

    def build_dictionary(self, dictionary_file_location):
        with open(dictionary_file_location, "r") as text_file:
            full_dictionary = text_file.read().splitlines()
        return [word.lower() for word in full_dictionary]

    def pattern_match(self, pattern, word):
        """Check if word matches the pattern"""
        if len(pattern) != len(word):
            return False
        for p, w in zip(pattern, word):
            if p != '.' and p != w:
                return False
        return True

    def filter_dictionary(self, pattern):
        """Filter dictionary based on current pattern"""
        return [word for word in self.current_dictionary if self.pattern_match(pattern, word)]

    def calculate_entropy(self, letter, possible_words):
        """Calculate information gain with frequency and bigram weighting"""
        pattern_counts = collections.defaultdict(int)
        total_words = len(possible_words)

        for word in possible_words:
            new_pattern = list(self.current_pattern)
            for i, char in enumerate(word):
                if char == letter and new_pattern[i] == '.':
                    new_pattern[i] = letter
            pattern_counts[''.join(new_pattern)] += 1

        entropy = 0
        for count in pattern_counts.values():
            p = count / total_words
            if p > 0:  # Avoid log(0)
                entropy -= p * math.log2(p)

        # Weight with letter frequency and bigram influence
        letter_freq = self.letter_frequencies[letter] / sum(self.letter_frequencies.values()) if letter in self.letter_frequencies else 0
        bigram_boost = sum(self.bigram_frequencies[(letter, next_l)] for next_l in string.ascii_lowercase if (letter, next_l) in self.bigram_frequencies) / sum(self.bigram_frequencies.values()) if self.bigram_frequencies else 0
        return entropy * (1 + letter_freq * 3.0 + bigram_boost * 2.0)

    def guess(self, word):
        self.guess_count += 1
        self.current_pattern = word[::2].replace("_", ".")
        word_len = len(self.current_pattern)

        # Initialize dictionary on first guess
        if self.guess_count == 1:
            self.current_dictionary = [w for w in self.full_dictionary if len(w) == word_len]
        else:
            self.current_dictionary = self.filter_dictionary(self.current_pattern)

        # No sampling to preserve full dictionary
        # if len(self.current_dictionary) > 10000:
        #     self.current_dictionary = random.sample(self.current_dictionary, 10000)

        # Get possible letters not guessed
        possible_letters = set(string.ascii_lowercase) - set(self.guessed_letters)
        if not possible_letters:
            return random.choice(string.ascii_lowercase)

        # Hardcoded initial guesses for first 5 moves
        initial_guesses = ['e', 'a', 'r', 'i', 'o']
        if self.guess_count <= len(initial_guesses) and initial_guesses[self.guess_count - 1] not in self.guessed_letters:
            return initial_guesses[self.guess_count - 1]

        # If few words left, pick from any word
        if len(self.current_dictionary) <= 2:
            for word in self.current_dictionary:
                for letter in word:
                    if letter in possible_letters:
                        return letter

        # Calculate entropy for each possible letter
        entropy_scores = {}
        for letter in possible_letters:
            entropy = self.calculate_entropy(letter, self.current_dictionary)
            entropy_scores[letter] = entropy

        # Find letter with highest entropy
        best_letter = max(entropy_scores, key=entropy_scores.get, default=None)
        if best_letter:
            return best_letter

        # Enhanced fallback strategies
        if word_len in self.positional_frequencies:
            for i in range(word_len):
                if self.current_pattern[i] == '.':
                    for letter, _ in self.positional_frequencies[word_len][i].most_common(3):
                        if letter in possible_letters:
                            return letter

        # Suffix matching with priority
        if word_len > 3:
            for suffix in self.common_suffixes:
                if len(suffix) < word_len:
                    start_pos = word_len - len(suffix)
                    if all(self.current_pattern[i] == '.' for i in range(start_pos, word_len)):
                        for letter in suffix:
                            if letter in possible_letters:
                                return letter

        # Overall frequency fallback with vowel and consonant priority
        priority_order = ['e', 'a', 'i', 'o', 'u', 'r', 't', 'n', 's', 'l', 'c', 'd'] + [l for l in string.ascii_lowercase if l not in 'eariontslcd']
        for letter in priority_order:
            if letter in possible_letters:
                return letter

        return 'e'  # Ultimate fallback

    def start_game(self, practice=True, verbose=True):
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
        self.guess_count = 0
        response = self.request("/new_game", {"practice": practice})
        if response.get('status') == "approved":
            game_id = response.get('game_id')
            word = response.get('word')
            tries_remains = response.get('tries_remains')
            if verbose:
                print(f"Started game {game_id}. Tries remaining: {tries_remains}. Word: {word}")
            while tries_remains > 0:
                guess_letter = self.guess(word)
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print(f"Guessing letter: {guess_letter}")
                try:
                    res = self.request("/guess_letter", {"request": "guess_letter", "game_id": game_id, "letter": guess_letter})
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print(f'Other exception caught on request: {e}')
                    raise
                if verbose:
                    print(f"Server response: {res}")
                status = res.get('status')
                tries_remains = res.get('tries_remains')
                if status == "success":
                    if verbose:
                        print(f"Successfully finished game: {game_id}")
                    return True
                elif status == "failed":
                    reason = res.get('reason', '# of tries exceeded!')
                    if verbose:
                        print(f"Failed game: {game_id}. Reason: {reason}")
                    return False
                elif status == "ongoing":
                    word = res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")
        return status == "success"

    def my_status(self):
        return self.request("/my_status", {})

    def request(self, path, args=None, post_args=None, method=None):
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"
        if self.access_token:
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token
        time.sleep(0.2)
        num_retry, time_sleep = 50, 2
        for it in range(num_retry):
            try:
                response = self.session.request(
                    method or "GET",
                    self.hangman_url + path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                break
            except requests.HTTPError as e:
                response = json.loads(e.read())
                raise HangmanAPIError(response)
            except requests.exceptions.SSLError as e:
                if it + 1 == num_retry:
                    raise
                time.sleep(time_sleep)
        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')
        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result

    def reset_game_state(self):
        """Reset game state without starting a new server game"""
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
        self.guess_count = 0
        self.current_pattern = ""

    def simulate(self, word, verbose=False):
        """Simulate a game locally without using the server"""
        self.reset_game_state()
        max_incorrect = 6
        incorrect_guesses = 0
        display_word = ['_'] * len(word)
        word = word.lower()

        if verbose:
            print(f"\nStarting simulation for word: {word}")

        while incorrect_guesses < max_incorrect:
            formatted_display = " ".join(display_word)
            guess = self.guess(formatted_display)

            if guess in self.guessed_letters:
                if verbose:
                    print(f"Duplicate guess: {guess}")
                continue

            self.guessed_letters.append(guess)

            if verbose:
                print(f"Guess #{self.guess_count}: {guess}")

            correct_guess = False
            new_display = list(display_word)
            for i, char in enumerate(word):
                if char == guess:
                    new_display[i] = char
                    correct_guess = True

            display_word = new_display

            if correct_guess:
                if verbose:
                    print(f"Correct! Current: {' '.join(display_word)}")
                if ''.join(display_word) == word:
                    if verbose:
                        print(f"Solved in {self.guess_count} guesses!")
                    return True
            else:
                incorrect_guesses += 1
                if verbose:
                    print(f"Incorrect! ({incorrect_guesses}/{max_incorrect} mistakes)")

        if verbose:
            print(f"Failed to guess: {word}")
        return False

    def evaluate(self, test_words, num_tests=1000, verbose=False):
        """Evaluate algorithm performance on a set of test words"""
        if not test_words:
            test_words = random.sample(self.full_dictionary, min(num_tests, len(self.full_dictionary)))
        else:
            test_words = random.sample(test_words, min(num_tests, len(test_words)))

        wins = 0
        total = len(test_words)

        print(f"Evaluating on {total} words...")
        for i, word in enumerate(test_words):
            success = self.simulate(word, verbose=verbose)
            if success:
                wins += 1

            if (i+1) % 100 == 0:
                print(f"Completed {i+1}/{total} games - Current accuracy: {wins/(i+1):.2%}")

        accuracy = wins / total
        print(f"\nEvaluation complete - Accuracy: {accuracy:.2%} ({wins}/{total})")
        return accuracy

class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""
        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result
        Exception.__init__(self, self.message)

def run_simulation(dictionary_path="words_250000_train.txt", num_tests=1000):
    """Run local simulation to evaluate algorithm performance"""
    print("Initializing simulator...")
    solver = HangmanAPI(dictionary_path=dictionary_path)

    # Load dictionary for testing
    with open(dictionary_path, "r") as f:
        full_dictionary = [word.strip().lower() for word in f.readlines()]

    # Create balanced test set by length
    length_counts = collections.Counter(len(w) for w in full_dictionary)
    test_words = []
    for length, count in length_counts.items():
        sample_size = min(int(1000 * count / len(full_dictionary)), count)
        test_words.extend(random.sample([w for w in full_dictionary if len(w) == length], sample_size))
    test_words = test_words[:1000]  # Ensure 1000 words

    # Run evaluation
    accuracy = solver.evaluate(test_words, num_tests=1000)
    return accuracy

def run_server_games(api, num_practice=100, num_recorded=1000):
    """Run games on the server once we're satisfied with performance"""
    # Run practice games
    print("Running practice games...")
    for i in range(num_practice):
        api.start_game(practice=1, verbose=False)
        time.sleep(0.2)

    # Get status and print success rate
    status = api.my_status()
    total_practice_runs = status[0]
    total_practice_successes = status[3]
    practice_success_rate = total_practice_successes / total_practice_runs if total_practice_runs > 0 else 0
    print(f"Practice success rate: {practice_success_rate:.2%}")

    # Run recorded games
    print("Running recorded games...")
    for i in range(num_recorded):
        api.start_game(practice=0, verbose=False)
        time.sleep(0.2)

    # Get final status
    status = api.my_status()
    total_recorded_runs = status[1]
    total_recorded_successes = status[2]
    recorded_success_rate = total_recorded_successes / total_recorded_runs if total_recorded_runs > 0 else 0
    print(f"Recorded success rate: {recorded_success_rate:.2%}")
    return recorded_success_rate

if __name__ == "__main__":
    # First run local simulation to test performance
    local_accuracy = run_simulation(num_tests=1000)

    if local_accuracy >= 0.50:  # Threshold at 50% for testing
        print("\nLocal simulation shows good performance. Running on server...")
        api = HangmanAPI(access_token="2dec2f1568ae8036660de2a01d5c2b", timeout=2000)
        server_accuracy = run_server_games(api)
        print(f"Final server accuracy: {server_accuracy:.2%}")
    else:
        print("\nAccuracy too low in simulation. Improve algorithm before running on server.")

Initializing simulator...
Evaluating on 986 words...
Completed 100/986 games - Current accuracy: 84.00%
Completed 200/986 games - Current accuracy: 84.50%
Completed 300/986 games - Current accuracy: 85.33%
Completed 400/986 games - Current accuracy: 84.75%
Completed 500/986 games - Current accuracy: 84.80%
Completed 600/986 games - Current accuracy: 85.17%
Completed 700/986 games - Current accuracy: 85.71%
Completed 800/986 games - Current accuracy: 85.50%
Completed 900/986 games - Current accuracy: 86.11%

Evaluation complete - Accuracy: 85.60% (844/986)

Local simulation shows good performance. Running on server...
Running practice games...


HangmanAPIError: {'error': 'Your account has been deactivated!'}

In [None]:
submitting??

hopefully final


no way









In [None]:
import json
import requests
import random
import string
import time
import re
import collections
import math
from tqdm import tqdm
import numpy as np
import os

try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

class ImprovedHangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None, dictionary_path="words_250000_train.txt"):
        self.hangman_url = "https://trexsim.com/trexsim/hangman"
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []

        # Load and process dictionary
        self.full_dictionary = self.build_dictionary(dictionary_path)
        self.full_dictionary = [word.lower().strip() for word in self.full_dictionary if word.strip()]

        # Remove duplicates and sort
        self.full_dictionary = sorted(list(set(self.full_dictionary)))

        # Precompute statistics
        self.letter_frequencies = self.calculate_letter_frequencies()
        self.positional_frequencies = self.calculate_positional_frequencies()
        self.common_endings = self.get_common_endings()

        # Game state
        self.current_dictionary = []
        self.guess_count = 0
        self.current_pattern = ""
        self.word_length = 0

    def build_dictionary(self, dictionary_file_location):
        try:
            with open(dictionary_file_location, "r", encoding='utf-8') as text_file:
                words = []
                for line in text_file:
                    word = line.strip().lower()
                    if word and word.isalpha():  # Only alphabetic words
                        words.append(word)
                return words
        except FileNotFoundError:
            print(f"Dictionary file {dictionary_file_location} not found!")
            return []

    def calculate_letter_frequencies(self):
        """Calculate overall letter frequencies in the dictionary"""
        letter_count = collections.Counter()
        total_letters = 0

        for word in self.full_dictionary:
            for letter in word:
                letter_count[letter] += 1
                total_letters += 1

        # Convert to probabilities
        letter_probs = {}
        for letter in string.ascii_lowercase:
            letter_probs[letter] = letter_count.get(letter, 0) / total_letters

        return letter_probs

    def calculate_positional_frequencies(self):
        """Calculate letter frequencies by position for each word length"""
        positional_freq = {}

        for word in self.full_dictionary:
            length = len(word)
            if length not in positional_freq:
                positional_freq[length] = [collections.Counter() for _ in range(length)]

            for pos, letter in enumerate(word):
                positional_freq[length][pos][letter] += 1

        # Convert to probabilities
        for length in positional_freq:
            for pos in range(length):
                total = sum(positional_freq[length][pos].values())
                if total > 0:
                    for letter in positional_freq[length][pos]:
                        positional_freq[length][pos][letter] /= total

        return positional_freq

    def get_common_endings(self):
        """Get common word endings for different lengths"""
        endings = collections.defaultdict(collections.Counter)

        for word in self.full_dictionary:
            if len(word) >= 3:
                endings[len(word)][word[-2:]] += 1
                if len(word) >= 4:
                    endings[len(word)][word[-3:]] += 1

        return endings

    def parse_word_pattern(self, word_display):
        """Parse the word display from server into a pattern"""
        # Remove spaces and convert underscores to dots
        pattern = word_display.replace(" ", "").replace("_", ".")
        return pattern

    def matches_pattern(self, word, pattern):
        """Check if a word matches the current pattern"""
        if len(word) != len(pattern):
            return False

        for w_char, p_char in zip(word, pattern):
            if p_char != '.' and p_char != w_char:
                return False
            if p_char == '.' and w_char in self.guessed_letters:
                return False

        return True

    def filter_dictionary_by_pattern(self, pattern):
        """Filter dictionary based on current pattern and guessed letters"""
        candidates = []

        for word in self.current_dictionary:
            if self.matches_pattern(word, pattern):
                # Additional check: word must contain all revealed letters
                revealed_letters = set(c for c in pattern if c != '.')
                if revealed_letters.issubset(set(word)):
                    candidates.append(word)

        return candidates

    def calculate_letter_score(self, letter, candidates):
        """Calculate score for a letter based on multiple factors"""
        if not candidates:
            return 0

        # Factor 1: Frequency in remaining candidates
        appears_in = sum(1 for word in candidates if letter in word)
        frequency_score = appears_in / len(candidates)

        # Factor 2: Expected information gain
        patterns_if_correct = set()
        for word in candidates:
            if letter in word:
                new_pattern = list(self.current_pattern)
                for i, char in enumerate(word):
                    if char == letter:
                        new_pattern[i] = letter
                patterns_if_correct.add(''.join(new_pattern))

        information_gain = len(patterns_if_correct) / len(candidates) if candidates else 0

        # Factor 3: Positional probability
        positional_score = 0
        if self.word_length in self.positional_frequencies:
            for pos in range(self.word_length):
                if self.current_pattern[pos] == '.':
                    positional_score += self.positional_frequencies[self.word_length][pos].get(letter, 0)

        # Factor 4: Overall letter frequency
        global_freq = self.letter_frequencies.get(letter, 0)

        # Combine factors
        total_score = (frequency_score * 0.4 +
                      information_gain * 0.3 +
                      positional_score * 0.2 +
                      global_freq * 0.1)

        return total_score

    def get_strategic_guess(self, pattern):
        """Get the best letter guess using multiple strategies"""
        self.current_pattern = pattern
        self.word_length = len(pattern)

        # Filter candidates based on current pattern and guessed letters
        candidates = []
        for word in self.current_dictionary:
            if self.matches_pattern(word, pattern):
                candidates.append(word)

        # Update current dictionary to only valid candidates
        self.current_dictionary = candidates

        if not candidates:
            # If no candidates, fall back to frequency-based guessing
            freq_order = ['e', 'a', 'i', 'o', 'u', 'r', 't', 'n', 's', 'l', 'c', 'h', 'd', 'y', 'f', 'm', 'w', 'g', 'p', 'b', 'v', 'k', 'j', 'x', 'q', 'z']
            available_letters = set(string.ascii_lowercase) - set(self.guessed_letters)
            for letter in freq_order:
                if letter in available_letters:
                    return letter
            return random.choice(list(available_letters)) if available_letters else 'e'

        # If very few candidates, try to distinguish between them
        if len(candidates) <= 3:
            letter_counts = collections.Counter()
            for word in candidates:
                for letter in word:
                    if letter not in self.guessed_letters and letter not in pattern:
                        letter_counts[letter] += 1

            if letter_counts:
                return letter_counts.most_common(1)[0][0]

        # Calculate scores for all unguessed letters
        available_letters = set(string.ascii_lowercase) - set(self.guessed_letters)

        if not available_letters:
            return random.choice(string.ascii_lowercase)

        letter_scores = {}
        for letter in available_letters:
            score = self.calculate_letter_score(letter, candidates)
            letter_scores[letter] = score

        # Return best scoring letter
        if letter_scores:
            best_letter = max(letter_scores, key=letter_scores.get)
            return best_letter

        # Fallback to frequency-based guessing
        freq_order = ['e', 'a', 'i', 'o', 'u', 'r', 't', 'n', 's', 'l', 'c', 'h', 'd', 'y', 'f', 'm', 'w', 'g', 'p', 'b', 'v', 'k', 'j', 'x', 'q', 'z']
        for letter in freq_order:
            if letter in available_letters:
                return letter

        return random.choice(list(available_letters))

    def guess(self, word):
        """Main guess function called by the game"""
        self.guess_count += 1

        # Parse the word pattern
        pattern = self.parse_word_pattern(word)

        # Initialize or update dictionary on first guess
        if self.guess_count == 1:
            word_len = len(pattern)
            self.current_dictionary = [w for w in self.full_dictionary if len(w) == word_len]
            print(f"Starting with {len(self.current_dictionary)} words of length {word_len}")

        # Get strategic guess
        guess_letter = self.get_strategic_guess(pattern)

        return guess_letter

    def start_game(self, practice=True, verbose=True):
        """Start a new hangman game"""
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
        self.guess_count = 0

        response = self.request("/new_game", {"practice": practice})
        if response.get('status') == "approved":
            game_id = response.get('game_id')
            word = response.get('word')
            tries_remains = response.get('tries_remains')

            if verbose:
                print(f"Started game {game_id}. Tries remaining: {tries_remains}. Word: {word}")

            while tries_remains > 0:
                guess_letter = self.guess(word)
                self.guessed_letters.append(guess_letter)

                if verbose:
                    print(f"Guessing letter: {guess_letter}")

                try:
                    res = self.request("/guess_letter", {
                        "request": "guess_letter",
                        "game_id": game_id,
                        "letter": guess_letter
                    })
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print(f'Other exception caught on request: {e}')
                    raise

                if verbose:
                    print(f"Server response: {res}")

                status = res.get('status')
                tries_remains = res.get('tries_remains')

                if status == "success":
                    if verbose:
                        print(f"Successfully finished game: {game_id}")
                    return True
                elif status == "failed":
                    reason = res.get('reason', 'Number of tries exceeded!')
                    if verbose:
                        print(f"Failed game: {game_id}. Reason: {reason}")
                    return False
                elif status == "ongoing":
                    word = res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")

        return False

    def simulate_game(self, target_word, verbose=False):
        """Simulate a game locally for testing - with proper game state reset"""
        target_word = target_word.lower()

        # CRITICAL: Reset ALL game state for each simulation
        self.guessed_letters = []
        self.guess_count = 0
        self.current_dictionary = [w for w in self.full_dictionary if len(w) == len(target_word)]
        self.current_pattern = ""
        self.word_length = len(target_word)

        max_wrong_guesses = 6
        wrong_guesses = 0

        # Initialize display
        display = ['_'] * len(target_word)

        if verbose:
            print(f"Simulating word: {target_word}")
            print(f"Starting with {len(self.current_dictionary)} possible words")

        while wrong_guesses < max_wrong_guesses and '_' in display:
            # Create current word display (this is what the server sends)
            word_display = ' '.join(display)

            # Get guess - this will increment guess_count
            guess = self.guess(word_display)

            # Check for duplicate guess (shouldn't happen but safety check)
            if guess in self.guessed_letters:
                if verbose:
                    print(f"ERROR: Already guessed {guess}!")
                # This would be a bug in our algorithm, but let's handle it
                wrong_guesses += 1
                continue

            # The guess() method should have already added to guessed_letters
            # But let's make sure
            if guess not in self.guessed_letters:
                self.guessed_letters.append(guess)

            # Check if guess is correct
            found = False
            for i, letter in enumerate(target_word):
                if letter == guess:
                    display[i] = letter
                    found = True

            if found:
                if verbose:
                    print(f"Correct guess: {guess} -> {' '.join(display)}")

                # Check if word is complete
                if '_' not in display:
                    if verbose:
                        print(f"Word completed in {self.guess_count} guesses with {wrong_guesses} wrong guesses!")
                    return True
            else:
                wrong_guesses += 1
                if verbose:
                    print(f"Wrong guess: {guess} ({wrong_guesses}/{max_wrong_guesses})")

        if verbose:
            print(f"Failed to guess word: {target_word} - {wrong_guesses} wrong guesses")
        return False

    def evaluate_performance(self, test_words=None, num_tests=100):
        """Evaluate performance on a set of test words"""
        if test_words is None:
            # Create a more realistic test set with various word lengths
            test_words = []
            length_groups = collections.defaultdict(list)

            # Group words by length
            for word in self.full_dictionary:
                length_groups[len(word)].append(word)

            # Sample proportionally from each length group
            for length, words in length_groups.items():
                if len(words) > 0 and 3 <= length <= 15:  # Focus on reasonable lengths
                    sample_size = min(num_tests // 10, len(words))  # Distribute across lengths
                    test_words.extend(random.sample(words, sample_size))

            # If we don't have enough words, fill with random selection
            while len(test_words) < num_tests:
                test_words.append(random.choice(self.full_dictionary))

            test_words = test_words[:num_tests]

        successes = 0
        total_games = len(test_words)
        total_guesses = 0
        total_wrong_guesses = 0

        print(f"Testing on {total_games} words...")

        for i, word in enumerate(test_words):
            success = self.simulate_game(word, verbose=False)
            if success:
                successes += 1

            # Track some statistics
            total_guesses += self.guess_count

            if (i + 1) % 25 == 0:
                current_accuracy = successes / (i + 1)
                avg_guesses = total_guesses / (i + 1)
                print(f"Progress: {i+1}/{total_games} - Accuracy: {current_accuracy:.2%} - Avg guesses: {avg_guesses:.1f}")

        final_accuracy = successes / total_games
        avg_guesses = total_guesses / total_games

        print(f"\nFinal Results:")
        print(f"Accuracy: {final_accuracy:.2%} ({successes}/{total_games})")
        print(f"Average guesses per game: {avg_guesses:.1f}")

        return final_accuracy

    def my_status(self):
        """Get current game status"""
        return self.request("/my_status", {})

    def request(self, path, args=None, post_args=None, method=None):
        """Make API request with retry logic"""
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"
        if self.access_token:
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token

        time.sleep(0.2)  # Rate limiting
        num_retry, time_sleep = 50, 2

        for it in range(num_retry):
            try:
                response = self.session.request(
                    method or "GET",
                    self.hangman_url + path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                break
            except requests.HTTPError as e:
                response = json.loads(e.read())
                raise HangmanAPIError(response)
            except requests.exceptions.SSLError as e:
                if it + 1 == num_retry:
                    raise
                time.sleep(time_sleep)

        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')

        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)

        return result


class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""
        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result
        Exception.__init__(self, self.message)


def main():
    # Test locally first
    print("Initializing improved Hangman solver...")
    solver = ImprovedHangmanAPI(dictionary_path="words_250000_train.txt")

    print(f"Loaded {len(solver.full_dictionary)} words from dictionary")

    # Test with a few words manually first to debug
    print("\nTesting a few words manually...")
    test_words = ['hello', 'world', 'python', 'hangman', 'algorithm']

    for word in test_words:
        print(f"\nTesting word: {word}")
        success = solver.simulate_game(word, verbose=True)
        print(f"Result: {'SUCCESS' if success else 'FAILED'}")

    # Test with a reasonable sample
    print("\nTesting on larger sample...")
    accuracy = solver.evaluate_performance(num_tests=100)  # Start with smaller test

    if accuracy >= 0.40:  # Lower threshold for realism
        print(f"\nLocal accuracy ({accuracy:.2%}) is reasonable. Ready for server testing.")

        # Initialize with your API key
        api_solver = ImprovedHangmanAPI(
            access_token="2ff261e6068158c67cdd1a33bed417",
            timeout=2000,
            dictionary_path="words_250000_train.txt"
        )

        # Run some practice games
        print("Running practice games...")
        practice_wins = 0
        for i in range(10):
            if api_solver.start_game(practice=True, verbose=False):
                practice_wins += 1
            time.sleep(0.5)

        print(f"Practice games: {practice_wins}/10 wins")

        # Check status
        status = api_solver.my_status()
        print(f"Status: {status}")

    else:
        print(f"Local accuracy ({accuracy:.2%}) is still unrealistic. Algorithm needs more work.")


if __name__ == "__main__":
    main()

Initializing improved Hangman solver...
Loaded 227300 words from dictionary

Testing a few words manually...

Testing word: hello
Simulating word: hello
Starting with 11274 possible words
Starting with 11274 words of length 5
Wrong guess: a (1/6)
Correct guess: e -> _ e _ _ _
Wrong guess: s (2/6)
Wrong guess: i (3/6)
Correct guess: o -> _ e _ _ o
Wrong guess: n (4/6)
Correct guess: l -> _ e l l o
Wrong guess: c (5/6)
Wrong guess: j (6/6)
Failed to guess word: hello - 6 wrong guesses
Result: FAILED

Testing word: world
Simulating word: world
Starting with 11274 possible words
Starting with 11274 words of length 5
Wrong guess: a (1/6)
Wrong guess: e (2/6)
Correct guess: o -> _ o _ _ _
Wrong guess: s (3/6)
Wrong guess: y (4/6)
Wrong guess: i (5/6)
Wrong guess: u (6/6)
Failed to guess word: world - 6 wrong guesses
Result: FAILED

Testing word: python
Simulating word: python
Starting with 19541 possible words
Starting with 19541 words of length 6
Wrong guess: e (1/6)
Wrong guess: a (2/6)
Wr