In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import math
import random
import string

SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

class CharMaskedDataset(Dataset):
    def __init__(self, word_list, mask_prob=0.20, max_length=15):
        self.word_list = [w for w in word_list if w.isalpha() and len(w) > 0 and len(w) <= max_length]
        self.mask_prob = mask_prob
        self.max_length = max_length
        # Vocab: '_' + '[MASK]' + 'a'...'z'
        self.vocab = ['_', '[MASK]'] + list(string.ascii_lowercase)
        self.char_to_idx = {c: i for i, c in enumerate(self.vocab)}
        self.idx_to_char = {i: c for c, i in self.char_to_idx.items()}
        
    def __len__(self):
        return len(self.word_list)

    def __getitem__(self, idx):
        word = self.word_list[idx]
        chars = list(word)
        if len(chars) < self.max_length:
            chars += ['_'] * (self.max_length - len(chars))

        input_ids = []
        target_ids = []
        for c in chars:
            if c == '_':
                input_ids.append(self.char_to_idx['_'])
                target_ids.append(-100)
            else:
                if random.random() < self.mask_prob:
                    input_ids.append(self.char_to_idx['[MASK]'])
                    target_ids.append(self.char_to_idx[c])
                else:
                    input_ids.append(self.char_to_idx[c])
                    target_ids.append(-100)

        return torch.tensor(input_ids, dtype=torch.long), torch.tensor(target_ids, dtype=torch.long)

class SinusoidalPositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=100):
        super(SinusoidalPositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp((torch.arange(0, d_model, 2).float() * -(math.log(10000.0)/d_model)))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0) # (1, max_len, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        seq_len = x.size(1)
        x = x + self.pe[:, :seq_len, :]
        return x

class TransformerMLM(nn.Module):
    def __init__(self, vocab_size=28, d_model=128, nhead=4, num_layers=6, dim_feedforward=512, max_len=100):
        super(TransformerMLM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_enc = SinusoidalPositionalEncoding(d_model, max_len)
        encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
        self.fc = nn.Linear(d_model, vocab_size)
    
    def forward(self, input_ids):
        x = self.embedding(input_ids)
        x = self.pos_enc(x)
        x = self.transformer(x)
        logits = self.fc(x)
        return logits

def train_mlm(dictionary, mask_prob, model_name, epochs=25, batch_size=64, lr=0.001, max_length=15, device='cpu'):
    dataset = CharMaskedDataset(dictionary, mask_prob=mask_prob, max_length=max_length)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

    model = TransformerMLM(vocab_size=len(dataset.vocab), d_model=128, nhead=4, num_layers=6, dim_feedforward=512, max_len=20).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(ignore_index=-100)

    model.train()
    for epoch in range(epochs):
        losses = []
        for input_ids, targets in dataloader:
            input_ids, targets = input_ids.to(device), targets.to(device)
            optimizer.zero_grad()
            logits = model(input_ids)
            loss = criterion(logits.view(-1, logits.size(-1)), targets.view(-1))
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        avg_loss = sum(losses)/len(losses)
        print(f"[{model_name}] Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

    torch.save(model.state_dict(), model_name)
    print(f"Model saved to {model_name}")

def main():
    dictionary_path = "../input/train-txt/words_250000_train.txt"
    with open(dictionary_path,"r") as f:
        full_dictionary = f.read().strip().split()
    full_dictionary = [w.lower() for w in full_dictionary if w.isalpha()]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)

    high_mask_prob = 0.55   # High unknown scenario
    medium_mask_prob = 0.35 # Medium unknown scenario
    low_mask_prob = 0.15    # Low unknown scenario (near end-game)

    train_mlm(full_dictionary, mask_prob=high_mask_prob, model_name="hangman_mlm_high.pt", epochs=100, batch_size=128, lr=0.0001, max_length=15, device=device)
    train_mlm(full_dictionary, mask_prob=medium_mask_prob, model_name="hangman_mlm_medium.pt", epochs=100, batch_size=128, lr=0.0001, max_length=15, device=device)
    train_mlm(full_dictionary, mask_prob=low_mask_prob, model_name="hangman_mlm_low.pt", epochs=100, batch_size=128, lr=0.0001, max_length=15, device=device)

if __name__ == "__main__":
    main()

Using device: cuda
[hangman_mlm_high.pt] Epoch 1/100, Loss: 2.6634
[hangman_mlm_high.pt] Epoch 2/100, Loss: 2.4635
[hangman_mlm_high.pt] Epoch 3/100, Loss: 2.4035
[hangman_mlm_high.pt] Epoch 4/100, Loss: 2.3703
[hangman_mlm_high.pt] Epoch 5/100, Loss: 2.3483
[hangman_mlm_high.pt] Epoch 6/100, Loss: 2.3283
[hangman_mlm_high.pt] Epoch 7/100, Loss: 2.3142
[hangman_mlm_high.pt] Epoch 8/100, Loss: 2.3019
[hangman_mlm_high.pt] Epoch 9/100, Loss: 2.2919
[hangman_mlm_high.pt] Epoch 10/100, Loss: 2.2823
[hangman_mlm_high.pt] Epoch 11/100, Loss: 2.2754
[hangman_mlm_high.pt] Epoch 12/100, Loss: 2.2678
[hangman_mlm_high.pt] Epoch 13/100, Loss: 2.2613
[hangman_mlm_high.pt] Epoch 14/100, Loss: 2.2542
[hangman_mlm_high.pt] Epoch 15/100, Loss: 2.2475
[hangman_mlm_high.pt] Epoch 16/100, Loss: 2.2445
[hangman_mlm_high.pt] Epoch 17/100, Loss: 2.2378
[hangman_mlm_high.pt] Epoch 18/100, Loss: 2.2349
[hangman_mlm_high.pt] Epoch 19/100, Loss: 2.2305
[hangman_mlm_high.pt] Epoch 20/100, Loss: 2.2251
[hangman_m

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import math
import random
import string

SEED = 42
random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

class CharMaskedDataset(Dataset):
    def __init__(self, word_list, mask_prob=0.25, max_length=7):
        self.word_list = [w for w in word_list if w.isalpha() and len(w)>0 and len(w)<=max_length]
        self.mask_prob = mask_prob
        self.max_length = max_length
        self.vocab = ['_', '[MASK]'] + list(string.ascii_lowercase)
        self.char_to_idx = {c:i for i,c in enumerate(self.vocab)}
        self.idx_to_char = {i:c for c,i in self.char_to_idx.items()}
        
    def __len__(self):
        return len(self.word_list)

    def __getitem__(self, idx):
        word = self.word_list[idx]
        chars = list(word)
        if len(chars)<self.max_length:
            chars += ['_']*(self.max_length - len(chars))

        input_ids = []
        target_ids = []
        for c in chars:
            if c=='_':
                input_ids.append(self.char_to_idx['_'])
                target_ids.append(-100)
            else:
                if random.random()<self.mask_prob:
                    input_ids.append(self.char_to_idx['[MASK]'])
                    target_ids.append(self.char_to_idx[c])
                else:
                    input_ids.append(self.char_to_idx[c])
                    target_ids.append(-100)
        return torch.tensor(input_ids,dtype=torch.long), torch.tensor(target_ids,dtype=torch.long)

class SinusoidalPositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=100):
        super(SinusoidalPositionalEncoding, self).__init__()
        pe=torch.zeros(max_len,d_model)
        position=torch.arange(0,max_len,dtype=torch.float).unsqueeze(1)
        div_term=torch.exp((torch.arange(0,d_model,2).float()*-(math.log(10000.0)/d_model)))
        pe[:,0::2]=torch.sin(position*div_term)
        pe[:,1::2]=torch.cos(position*div_term)
        pe=pe.unsqueeze(0)
        self.register_buffer('pe',pe)

    def forward(self,x):
        seq_len=x.size(1)
        x=x+self.pe[:, :seq_len, :]
        return x

class TransformerMLM(nn.Module):
    def __init__(self, vocab_size=28,d_model=128,nhead=4,num_layers=6,dim_feedforward=512,max_len=10):
        super(TransformerMLM,self).__init__()
        self.embedding=nn.Embedding(vocab_size,d_model)
        self.pos_enc=SinusoidalPositionalEncoding(d_model,max_len)
        encoder_layer=nn.TransformerEncoderLayer(d_model,nhead,dim_feedforward,batch_first=True)
        self.transformer=nn.TransformerEncoder(encoder_layer,num_layers)
        self.fc=nn.Linear(d_model,vocab_size)
    def forward(self,input_ids):
        x=self.embedding(input_ids)
        x=self.pos_enc(x)
        x=self.transformer(x)
        logits=self.fc(x)
        return logits

def main():
    dictionary_path="../input/train-txt/words_250000_train.txt"
    with open(dictionary_path,"r") as f:
        full_dictionary=f.read().strip().split()
    full_dictionary=[w.lower() for w in full_dictionary if w.isalpha()]

    device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)

    dataset=CharMaskedDataset(full_dictionary, mask_prob=0.25, max_length=7)
    batch_size=128
    dataloader=DataLoader(dataset,batch_size=batch_size,shuffle=True,drop_last=True)

    model=TransformerMLM(vocab_size=len(dataset.vocab),d_model=128,nhead=4,num_layers=6,dim_feedforward=512,max_len=10).to(device)
    optimizer=optim.Adam(model.parameters(),lr=0.0001)
    criterion=nn.CrossEntropyLoss(ignore_index=-100)

    epochs=100
    model.train()
    for epoch in range(epochs):
        losses=[]
        for input_ids,targets in dataloader:
            input_ids,targets=input_ids.to(device),targets.to(device)
            optimizer.zero_grad()
            logits=model(input_ids)
            loss=criterion(logits.view(-1,logits.size(-1)), targets.view(-1))
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
        avg_loss=sum(losses)/len(losses)
        print(f"Short Model Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

    torch.save(model.state_dict(),"hangman_mlm_short.pt")
    print("Short word model saved to hangman_mlm_short.pt")

if __name__=="__main__":
    main()

Using device: cuda
Short Model Epoch 1/100, Loss: 2.8486
Short Model Epoch 2/100, Loss: 2.6266
Short Model Epoch 3/100, Loss: 2.5367
Short Model Epoch 4/100, Loss: 2.4869
Short Model Epoch 5/100, Loss: 2.4500
Short Model Epoch 6/100, Loss: 2.4282
Short Model Epoch 7/100, Loss: 2.4143
Short Model Epoch 8/100, Loss: 2.3934
Short Model Epoch 9/100, Loss: 2.3897
Short Model Epoch 10/100, Loss: 2.3806
Short Model Epoch 11/100, Loss: 2.3740
Short Model Epoch 12/100, Loss: 2.3625
Short Model Epoch 13/100, Loss: 2.3533
Short Model Epoch 14/100, Loss: 2.3485
Short Model Epoch 15/100, Loss: 2.3395
Short Model Epoch 16/100, Loss: 2.3321
Short Model Epoch 17/100, Loss: 2.3340
Short Model Epoch 18/100, Loss: 2.3235
Short Model Epoch 19/100, Loss: 2.3225
Short Model Epoch 20/100, Loss: 2.3162
Short Model Epoch 21/100, Loss: 2.3147
Short Model Epoch 22/100, Loss: 2.3095
Short Model Epoch 23/100, Loss: 2.3034
Short Model Epoch 24/100, Loss: 2.2974
Short Model Epoch 25/100, Loss: 2.3008
Short Model Epo

# Trexquant Interview Project (The Hangman Game)

* Copyright Trexquant Investment LP. All Rights Reserved. 
* Redistribution of this question without written consent from Trexquant is prohibited

## Instruction:
For this coding test, your mission is to write an algorithm that plays the game of Hangman through our API server. 

When a user plays Hangman, the server first selects a secret word at random from a list. The server then returns a row of underscores (space separated)—one for each letter in the secret word—and asks the user to guess a letter. If the user guesses a letter that is in the word, the word is redisplayed with all instances of that letter shown in the correct positions, along with any letters correctly guessed on previous turns. If the letter does not appear in the word, the user is charged with an incorrect guess. The user keeps guessing letters until either (1) the user has correctly guessed all the letters in the word
or (2) the user has made six incorrect guesses.

You are required to write a "guess" function that takes current word (with underscores) as input and returns a guess letter. You will use the API codes below to play 1,000 Hangman games. You have the opportunity to practice before you want to start recording your game results.

Your algorithm is permitted to use a training set of approximately 250,000 dictionary words. Your algorithm will be tested on an entirely disjoint set of 250,000 dictionary words. Please note that this means the words that you will ultimately be tested on do NOT appear in the dictionary that you are given. You are not permitted to use any dictionary other than the training dictionary we provided. This requirement will be strictly enforced by code review.

You are provided with a basic, working algorithm. This algorithm will match the provided masked string (e.g. a _ _ l e) to all possible words in the dictionary, tabulate the frequency of letters appearing in these possible words, and then guess the letter with the highest frequency of appearence that has not already been guessed. If there are no remaining words that match then it will default back to the character frequency distribution of the entire dictionary.

This benchmark strategy is successful approximately 18% of the time. Your task is to design an algorithm that significantly outperforms this benchmark.

In [1]:
import json
import requests
import random
import string
import secrets
import time
import re
import collections

try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

In [None]:
import requests
import time
import re
import json
import collections
import torch
import torch.nn as nn
import math
import string
from urllib.parse import parse_qs

def vowel_count(clean_word, vowels):
    v_count = sum(1 for c in clean_word if c in vowels)
    return v_count / len(clean_word) if len(clean_word)>0 else 0.0

def func(new_dictionary):
    dictx = collections.Counter()
    for words in new_dictionary:
        temp = collections.Counter(words)
        for i in temp:
            temp[i] = 1
        dictx = dictx + temp
    return dictx

def func2(n_word_dictionary, clean_word):
    new_dictionary = []
    l = len(clean_word)
    if l in n_word_dictionary:
        for dict_word in n_word_dictionary[l]:
            if re.fullmatch(clean_word, dict_word):
                new_dictionary.append(dict_word)
    return new_dictionary

class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result=result
        self.code=None
        try:
            self.type=result["error_code"]
        except (KeyError,TypeError):
            self.type=""
        try:
            self.message=result["error_description"]
        except (KeyError,TypeError):
            try:
                self.message=result["error"]["message"]
                self.code=result["error"].get("code")
                if not self.type:
                    self.type=result["error"].get("type","")
            except (KeyError,TypeError):
                try:
                    self.message=result["error_msg"]
                except (KeyError,TypeError):
                    self.message=result
        super().__init__(self.message)

class SinusoidalPositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=20):
        super(SinusoidalPositionalEncoding,self).__init__()
        pe=torch.zeros(max_len,d_model)
        position=torch.arange(0,max_len,dtype=torch.float).unsqueeze(1)
        div_term=torch.exp((torch.arange(0,d_model,2).float()*-(math.log(10000.0)/d_model)))
        pe[:,0::2]=torch.sin(position*div_term)
        pe[:,1::2]=torch.cos(position*div_term)
        pe=pe.unsqueeze(0)
        self.register_buffer('pe',pe)
    def forward(self,x):
        seq_len=x.size(1)
        x=x+self.pe[:, :seq_len,:]
        return x

class TransformerMLM(nn.Module):
    def __init__(self, vocab_size=28, d_model=128, nhead=4, num_layers=4, dim_feedforward=256, max_len=20):
        super(TransformerMLM,self).__init__()
        self.embedding=nn.Embedding(vocab_size,d_model)
        self.pos_enc=SinusoidalPositionalEncoding(d_model,max_len)
        encoder_layer=nn.TransformerEncoderLayer(d_model,nhead,dim_feedforward,batch_first=True)
        self.transformer=nn.TransformerEncoder(encoder_layer,num_layers)
        self.fc=nn.Linear(d_model,vocab_size)
    def forward(self,input_ids):
        x=self.embedding(input_ids)
        x=self.pos_enc(x)
        x=self.transformer(x)
        logits=self.fc(x)
        return logits

class HangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None):
        self.hangman_url=self.determine_hangman_url()
        self.access_token=access_token
        self.session=session or requests.Session()
        self.timeout=timeout
        self.guessed_letters=[]

        full_dictionary_location="words_250000_train.txt"
        self.full_dictionary=self.build_dictionary(full_dictionary_location)
        self.full_dictionary_common_letter_sorted=collections.Counter("".join(self.full_dictionary)).most_common()

        self.current_dictionary=self.full_dictionary[:]
        self.n_word_dictionary=self.build_substring_dictionary(self.full_dictionary)

        device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.device=device

        self.vocab=['_','[MASK]']+list(string.ascii_lowercase)
        self.char_to_idx={c:i for i,c in enumerate(self.vocab)}
        self.idx_to_char={i:c for c,i in self.char_to_idx.items()}

        # Load all four models:
        self.mlm_high=TransformerMLM(vocab_size=len(self.vocab), d_model=128, nhead=4, num_layers=6, dim_feedforward=512, max_len=20).to(device)
        self.mlm_medium=TransformerMLM(vocab_size=len(self.vocab), d_model=128, nhead=4, num_layers=6, dim_feedforward=512, max_len=20).to(device)
        self.mlm_low=TransformerMLM(vocab_size=len(self.vocab), d_model=128, nhead=4, num_layers=6, dim_feedforward=512, max_len=20).to(device)
        self.mlm_short=TransformerMLM(vocab_size=len(self.vocab), d_model=128, nhead=4, num_layers=6, dim_feedforward=512, max_len=10).to(device)

        self.mlm_high.load_state_dict(torch.load("better_models/hangman_mlm_high.pt", map_location=device))
        self.mlm_medium.load_state_dict(torch.load("better_models/hangman_mlm_medium.pt", map_location=device))
        self.mlm_low.load_state_dict(torch.load("better_models/hangman_mlm_low (1).pt", map_location=device))
        self.mlm_short.load_state_dict(torch.load("better_models/hangman_mlm_short.pt", map_location=device))

        self.mlm_high.eval()
        self.mlm_medium.eval()
        self.mlm_low.eval()
        self.mlm_short.eval()

    @staticmethod
    def determine_hangman_url():
        links=['https://trexsim.com','https://sg.trexsim.com']
        data={link:0 for link in links}
        for link in links:
            requests.get(link)
            for i in range(10):
                s=time.time()
                requests.get(link)
                data[link]=time.time()-s
        link=sorted(data.items(), key=lambda x:x[1])[0][0]
        link+='/trexsim/hangman'
        return link

    def build_substring_dictionary(self, df):
        max_length=max(len(w) for w in df)
        n_word_dictionary={i:[] for i in range(3,min(max_length,30)+1)}
        for count in range(3,min(max_length,30)+1):
            for w in df:
                if len(w)>=count:
                    for i in range(len(w)-count+1):
                        n_word_dictionary[count].append(w[i:i+count])
        return n_word_dictionary

    def build_dictionary(self, dictionary_file_location):
        with open(dictionary_file_location,"r") as f:
            full_dictionary=f.read().strip().split()
        full_dictionary=[w.lower() for w in full_dictionary if w.isalpha()]
        return full_dictionary

    def try_func2(self, clean_word):
        ndict=func2(self.n_word_dictionary, clean_word)
        return func(ndict)

    def try_substring_logic(self, clean_word, divisor):
        length=len(clean_word)
        x=int(length/divisor)
        c=collections.Counter()
        if x>=3:
            for i in range(length - x +1):
                s=clean_word[i:i+x]
                ndict=func2(self.n_word_dictionary,s)
                temp=func(ndict)
                c=c+temp
        return c

    def get_model_probs(self, model, clean_word):
        mlm_input_chars=[]
        for c in clean_word:
            if c=='.':
                mlm_input_chars.append('[MASK]')
            elif c in self.vocab:
                mlm_input_chars.append(c)
            else:
                mlm_input_chars.append('_')


        input_ids=[self.char_to_idx.get(ch,self.char_to_idx['_']) for ch in mlm_input_chars]

        model_max_len = model.pos_enc.pe.size(1)
        if len(input_ids) > model_max_len:
            input_ids = input_ids[:model_max_len]
            
        input_ids_tensor=torch.tensor([input_ids],dtype=torch.long,device=self.device)
        with torch.no_grad():
            logits=model(input_ids_tensor)
            mask_positions=[i for i,ch in enumerate(mlm_input_chars) if ch=='[MASK]']
            letter_indices=range(2,28)
            letter_probs=collections.Counter()
            if mask_positions:
                for pos in mask_positions:
                    pos_logits=logits[0,pos]
                    pos_probs=torch.softmax(pos_logits,dim=0)
                    for li in letter_indices:
                        ltr=self.idx_to_char[li]
                        letter_probs[ltr]+=pos_probs[li].item()
                for ltr in letter_probs:
                    letter_probs[ltr]=letter_probs[ltr]/len(mask_positions)
            else:
                for ltr in string.ascii_lowercase:
                    letter_probs[ltr]=1.0/26.0
        return dict(letter_probs)

    def guess(self, word):
        vowels=set('aeiou')
        clean_word=word[::2].replace("_",".")
        len_word=len(clean_word)
        num_unknown=clean_word.count('.')
        unknown_ratio=num_unknown/len_word if len_word>0 else 0.0

        # Heuristic approach
        regex_pattern="^"+clean_word+"$"
        new_dictionary=[w for w in self.current_dictionary if len(w)==len_word and re.fullmatch(regex_pattern,w)]
        self.current_dictionary=new_dictionary

        def get_heuristic_letters(clean_word):
            c=func(self.current_dictionary)
            if len(self.current_dictionary)==0:
                c=self.try_func2(clean_word)
                if sum(c.values())==0:
                    c=self.try_substring_logic(clean_word,2)
                    if sum(c.values())==0:
                        c=self.try_substring_logic(clean_word,3)
                        if sum(c.values())==0:
                            c=collections.Counter()
                            total=sum(freq for _,freq in self.full_dictionary_common_letter_sorted)
                            for ltr,freq in self.full_dictionary_common_letter_sorted:
                                c[ltr]=freq
            return c

        def letter_frequency_to_scores(c):
            total=sum(c.values())
            scores={}
            if total>0:
                for ltr,val in c.items():
                    scores[ltr]=val/total
            return scores

        h_counts=get_heuristic_letters(clean_word)
        heuristic_scores=letter_frequency_to_scores(h_counts)

        if len_word <=7:
            # For short words, get short model probs
            model_short_prob=self.get_model_probs(self.mlm_short, clean_word)
            model_high_prob=self.get_model_probs(self.mlm_high, clean_word)
            model_medium_prob=self.get_model_probs(self.mlm_medium, clean_word)
            model_low_prob=self.get_model_probs(self.mlm_low, clean_word)

            if unknown_ratio >0.7:
                w_short=0.1
                w_heuristic=0.4
                w_high = 0.2
                w_medium= 0.15
                w_low=0.15
            elif unknown_ratio >0.4:
                w_short=0.2
                w_heuristic=0.4
                w_low=0.2
                w_high=0.0
                w_medium=0.2
            elif unknown_ratio >0.15:
                w_short=0.2
                w_heuristic=0.4
                w_low=0.3
                w_high=0.0
                w_medium=0.1
            else:
                w_heuristic=0.5
                w_short=0.2
                w_low=0.2
                w_high=0.0
                w_medium=0.1

            combined_scores={}
            for ltr in string.ascii_lowercase:
                pm_s=model_short_prob.get(ltr,0.0)
                ph=heuristic_scores.get(ltr,0.0)
                pm_h=model_high_prob.get(ltr,0.0)
                pm_m=model_medium_prob.get(ltr,0.0)
                pm_l=model_low_prob.get(ltr,0.0)

                combined_scores[ltr]=(w_short*pm_s + w_heuristic*ph + w_high*pm_h + w_medium*pm_m + w_low*pm_l)
        else:
            model_high_prob=self.get_model_probs(self.mlm_high, clean_word)
            model_medium_prob=self.get_model_probs(self.mlm_medium, clean_word)
            model_low_prob=self.get_model_probs(self.mlm_low, clean_word)

            if unknown_ratio>0.7:
                w_high=0.2
                w_medium=0.3
                w_low=0.2
                w_heuristic=0.3
            elif unknown_ratio>0.4:
                w_high=0.15
                w_medium=0.4
                w_low=0.15
                w_heuristic=0.3
            elif unknown_ratio>0.15:
                w_high=0.1
                w_medium=0.2
                w_low=0.3
                w_heuristic=0.4
            else:
                w_high=0.05
                w_medium=0.15
                w_low=0.3
                w_heuristic=0.5

            combined_scores={}
            for ltr in string.ascii_lowercase:
                pm_h=model_high_prob.get(ltr,0.0)
                pm_m=model_medium_prob.get(ltr,0.0)
                pm_l=model_low_prob.get(ltr,0.0)
                ph=heuristic_scores.get(ltr,0.0)
                combined_scores[ltr]=(w_high*pm_h + w_medium*pm_m + w_low*pm_l + w_heuristic*ph)

        known_letters=[c for c in clean_word if c!='.']
        vowels=set('aeiou')
        v_ratio=vowel_count(known_letters,vowels) if known_letters else 0.0

        sorted_letters=sorted(combined_scores.items(), key=lambda x:x[1], reverse=True)

        chosen_letter=None
        if v_ratio>0.6 and unknown_ratio<0.5:
            for letter,score in sorted_letters:
                if letter not in self.guessed_letters and letter not in vowels:
                    chosen_letter=letter
                    break
            if chosen_letter is None:
                for letter,score in sorted_letters:
                    if letter not in self.guessed_letters:
                        chosen_letter=letter
                        break
        else:
            for letter, score in sorted_letters:
                if letter not in self.guessed_letters:
                    chosen_letter=letter
                    break

        if chosen_letter is None:
            chosen_letter='e'

        return chosen_letter

    def start_game(self, practice=True, verbose=True):
        self.guessed_letters=[]
        self.current_dictionary=self.full_dictionary.copy()

        response=self.request("/new_game",{"practice":practice})
        if response.get('status')=="approved":
            game_id=response.get('game_id')
            word=response.get('word')
            tries_remains=response.get('tries_remains')
            if verbose:
                print(f"Game {game_id} started. Tries:{tries_remains}, Word:{word}")
            while tries_remains>0:
                guess_letter=self.guess(word)
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print("Guessing:",guess_letter)
                try:
                    res=self.request("/guess_letter",{"request":"guess_letter","game_id":game_id,"letter":guess_letter})
                except HangmanAPIError as e:
                    print("HangmanAPIError:",e)
                    continue
                except Exception as e:
                    print("Unexpected error:",e)
                    raise e

                if verbose:
                    print("Server response:",res)
                status=res.get('status')
                tries_remains=res.get('tries_remains')
                if status=="success":
                    if verbose:
                        print(f"Successfully finished game: {game_id}")
                    return True
                elif status=="failed":
                    reason=res.get('reason','# tries exceeded!')
                    if verbose:
                        print(f"Failed game: {game_id}, Reason:{reason}")
                    return False
                elif status=="ongoing":
                    word=res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")
        return response.get('status')=="success"

    def my_status(self):
        return self.request("/my_status",{})

    def request(self,path,args=None,post_args=None,method=None):
        if args is None:
            args={}
        if post_args is not None:
            method="POST"

        if self.access_token:
            if post_args and "access_token" not in post_args:
                post_args["access_token"]=self.access_token
            elif "access_token" not in args:
                args["access_token"]=self.access_token

        time.sleep(0.2)
        num_retry,time_sleep=50,2
        for it in range(num_retry):
            try:
                response=self.session.request(
                    method or "GET",
                    self.hangman_url+path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                response.raise_for_status()
                break
            except requests.HTTPError as e:
                try:
                    resp=e.response.json()
                except ValueError:
                    resp={"error_msg":str(e)}
                raise HangmanAPIError(resp)
            except requests.exceptions.SSLError:
                if it+1==num_retry:
                    raise
                time.sleep(time_sleep)
            except requests.exceptions.RequestException:
                if it+1==num_retry:
                    raise
                time.sleep(time_sleep)

        headers=response.headers
        if 'json' in headers.get('content-type',''):
            result=response.json()
        elif "access_token" in parse_qs(response.text):
            query_str=parse_qs(response.text)
            if "access_token" in query_str:
                result={"access_token":query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"]=query_str["expires"][0]
            else:
                try:
                    result=response.json()
                except ValueError:
                    result={'error_msg':response.text}
                raise HangmanAPIError(result)
        else:
            raise HangmanAPIError('Maintype was not text or querystring')

        if result and isinstance(result,dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result

In [17]:
api = HangmanAPI(access_token="7d96983667363dce2c8ee97995455f", timeout=2000)

  self.mlm_high.load_state_dict(torch.load("better_models/hangman_mlm_high.pt", map_location=device))
  self.mlm_medium.load_state_dict(torch.load("better_models/hangman_mlm_medium.pt", map_location=device))
  self.mlm_low.load_state_dict(torch.load("better_models/hangman_mlm_low (1).pt", map_location=device))
  self.mlm_short.load_state_dict(torch.load("better_models/hangman_mlm_short.pt", map_location=device))


In [4]:
[total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status()
print(total_practice_runs)
print(total_practice_successes)

1973
1008


## Playing recorded games:
Please finalize your code prior to running the cell below. Once this code executes once successfully your submission will be finalized. Our system will not allow you to rerun any additional games.

Please note that it is expected that after you successfully run this block of code that subsequent runs will result in the error message "Your account has been deactivated".

Once you've run this section of the code your submission is complete. Please send us your source code via email.

In [None]:
for i in range(1000):
    print('Playing ', (i+1), ' th game')
    # Uncomment the following line to execute your final runs. Do not do this until you are satisfied with your submission
    api.start_game(practice=0,verbose=False)
    [total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
    success_rate = total_recorded_successes/(total_recorded_runs)
    print('overall success rate = %.3f' % success_rate)
    
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

Playing  273  th game


HangmanAPIError: {'error': 'Your account has been deactivated!'}

In [26]:
[total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
success_rate = total_recorded_successes/(total_recorded_runs + 1e-6)
print('overall success rate = %.3f' % success_rate)

print('total_practice_runs = ', total_practice_runs)
print('total_recorded_runs = ', total_recorded_runs)
print('total_recorded_successes = ', total_recorded_successes)
print('total_practice_successes = ', total_practice_successes)

overall success rate = 0.568
total_practice_runs =  1973
total_recorded_runs =  1000
total_recorded_successes =  568
total_practice_successes =  1008
