In [2]:
#imports
import numpy as np
import pandas as pd
from scipy import sparse as sparse
from scipy.sparse import linalg as splinalg
from sys import argv
# import matplotlib.pyplot as plt
import pickle as pl
import warnings
from scipy.integrate import odeint, solve_ivp
warnings.filterwarnings("ignore")
import collections

In [3]:
#imports
import torch
import torch.nn as nn
from torch.nn  import functional as F
#use gpu if available
device = torch.device('mps' if torch.backends.mps.is_available() else "cpu")
print(device)
# data = torch.tensor(encode(text), dtype = torch.long).to(device)

mps


In [6]:
# read it in to inspect it
with open('../words_250000_train.txt', 'r', encoding='utf-8') as f:
    text = f.read()

print("length of dataset in characters: ", len(text))

c = collections.Counter(text)
sorted_letter_count = c.most_common() 
train_word_list = np.array(text.split('\n'), dtype = str)[:-1]


train_len = np.array([len(itr) for itr in train_word_list]) # length of words in train_word_list
print("Minimum and maximum word length: ", min(train_len), max(train_len)) 
print("Longest word: ", train_word_list[np.argmin(train_len)])
print("Shortest word: ", train_word_list[np.argmax(train_len)])

train_word_list = np.array([ list(w.ljust(30)) for w in train_word_list]) # padding
len_mask = torch.tensor([np.concatenate((np.zeros(train_len[itr]), np.ones(30-train_len[itr]))) for itr in range(len(train_len))])

print(len_mask[:5])
print(train_word_list[:5])


# get the vocabulary
chars = sorted(list(set(text))) # vocab
chars[0]=' ' # padding character
chars.insert(1, '_') # add missing character symbol to the vocab
vocab_size = len(chars)
print(f'Vocabulary size: {vocab_size}')

# print vocab
print(''.join(chars))

length of dataset in characters:  2352046
Minimum and maximum word length:  1 29
Longest word:  c
Shortest word:  cyclotrimethylenetrinitramine
tensor([[0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], dtype=torch.float64)
[['a' 'a' 'a' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' '
  ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ']
 ['a' 'a' 'a' 

In [16]:

# create a mapping from characters to integers and vice versa

stoi = {ch:i for i, ch in enumerate(chars)} # abbrev for string to integer
itos = {i:ch for ch, i in stoi.items()}

# encoder: takes a string and returns a list of integers
encode = lambda s: [stoi[c] for c in s]

# decoder: takes a list of integers and returns a string
decode = lambda l: ''.join([itos[i] for i in l])

#unit test
print(encode('hi_there'))
print(decode(encode('hi_there')))
print(stoi)

[9, 10, 1, 21, 9, 6, 19, 6]
hi_there
{' ': 0, '_': 1, 'a': 2, 'b': 3, 'c': 4, 'd': 5, 'e': 6, 'f': 7, 'g': 8, 'h': 9, 'i': 10, 'j': 11, 'k': 12, 'l': 13, 'm': 14, 'n': 15, 'o': 16, 'p': 17, 'q': 18, 'r': 19, 's': 20, 't': 21, 'u': 22, 'v': 23, 'w': 24, 'x': 25, 'y': 26, 'z': 27}


### Data loader

In [17]:
# data loading
def get_batch(train_word_list, train_len, batch_size, frac = 0.27):
    # generate a small batch of data of inputs x and targets y
    ix = np.random.randint(0, len(train_word_list) , size = (batch_size,))

    y = torch.tensor([encode(train_word_list[itr]) for itr in ix])

    pos_mask = (torch.rand(size = (batch_size, 30))<frac)*(1-len_mask[ix])
    x = y.masked_fill(pos_mask==1, 1)
    x, y = x.to(device), y.to(device)
    return x, y


get_batch(train_word_list, train_len, 2)

(tensor([[ 1,  1,  1, 21,  1,  1,  1,  1,  6,  5,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
         [14,  1,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]], device='mps:0'),
 tensor([[21, 19, 22, 21,  9,  4, 16, 24,  6,  5,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
         [14,  2, 15, 26,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
           0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]], device='mps:0'))

### Part 1: Define a decoder only head to perform scaled dot product attention.

Consider an input $x \in \mathbb{R}^{B \times T \times C}$ to the attention head, where $B$ is the batch size, $T$ is the context length and $C$ is the embedding dimension (input dimension from the last layer).

Then, the attention head of size $d$ outputs $z \in \mathbb{R}^{B \times T \times d}$

$$ z = softmax \left( \frac{Q K^T}{\sqrt{d}} \right) V $$

where $K, Q, V$ are linear (and not affine) transformations of the input

$$ K = k x, \text{ with } k \in \mathbb{R}^{d \times C} $$
$$ Q = q x, \text{ with } q \in \mathbb{R}^{d \times C} $$
$$ V = v x, \text{ with } v \in \mathbb{R}^{d \times C} $$

Define a nn.Module class to implement a decoder only head.


In [18]:
class Head(nn.Module):
    """Description: decoder only scaled dot product attention"""

    def __init__(self, n_embd, n_head):
        super().__init__()
        """
        Description:
            Initializes various layers used in the forward function
            Note: runs when an object of Head is created
        Input:
            n_embd (C): The embedding dimension
            n_head (d): Hidden dimension of the single head, i.e., key.shape = query.shape = value.shape = (n_embd, head_size)
        """
        # use linear layers to define key, query and value matrices
        # bias is usually not used in nn.Linear in the attention blocks; use bias = False
        self.key = nn.Linear(n_embd, n_head, bias = False, device = device)
        self.query = nn.Linear(n_embd, n_head, bias = False, device = device)
        self.value = nn.Linear(n_embd, n_head, bias = False, device = device)


    def forward(self, x):
        """
        Description:
            Forward pass of the attention head
        Input:
            x of shape (B, T, C)
        Output:
            z of shape (B, T, d)
        """
        x=x.to(device)
        B, T, C = x.shape # B: batch size; T: block_size; C: embedding dim
        K = self.key(x)    # (B, T, d)
        Q = self.query(x)  # (B, T, d)
        # compute scaled dot product attention scores w = q @ k / sqrt(d)
        W = torch.einsum('btd,bud->btu', Q, K) # (B, T, d) @ (B, T, d) -> (B, T, T)
        # Mask out the attention scores such that the padded values are ignored
        mask = torch.ones((B, T), device = device).masked_fill(torch.sum(x, axis = -1)==0, float('-inf')) # (B, T, T)
        
        # apply a softmax along the last dim;
        # use nn.functional.softmax;
        # Note: nn.functional is imported as F
        W = F.softmax(W + mask[:, None, :], dim=2) # (B, T, T)
#         print(W[:, 0,:])
        # perform the weighted aggregation of the values: V = v @ x
        V = self.value(x) # (B, T, d)
        # output w @ v
        out = W @ V # (B, T, T) @ (B, T, d) -> (B, T, d)
        return out

### Part 2: Define multi-head attention module

Given input $x \in \mathbb{R}^{B \times T \times C}$ as before, define multiple heads of attention that work in parallel. Use class Head() defined above to create a list of attention heads using nn.ModuleList()

Each attention head returns $z \in \mathbb{R}^{B \times T \times d}$

Using a linear layer, project the concatentated output of attention heads back to the embedding dimension $C$.

In [19]:
class MultiHeadAttention(nn.Module):
    """ multiple heads of self-attention in parallel """
    def __init__(self, num_heads, head_size, embd_size):
        super().__init__()
        """
        Description: Performs multi-head attention followed by a projection
        Input:
            num_heads: number of attention heads
            head_size (d): size of each attention head
        """
        self.heads = nn.ModuleList([Head(embd_size, head_size) for i in range(num_heads)]) # Define n_heads copies of Head() of size head_size as a list
        self.proj = nn.Linear(num_heads*head_size, embd_size, bias = False) # projection layer using nn.Linear

    def forward(self, x):
        """"
        Description: Forward pass of multi head attention
        Input: x of shape (B, T, C)
        """
        multihead_out = []
        for i, l in enumerate(self.heads):
            multihead_out.append(l(x))
        out = torch.cat(multihead_out, dim = 2)

        #project the output using a linear
        out = self.proj(out)
        return out

### Part 3: Implement a simple two layer ReLU FCN $f: \mathbb{R}^n \to \mathbb{R}^n$ with hidden layer dimension $h = 4 \times n$ .

The network should perform the following operations:

1. Linear: $\mathbb{R}^{n \to 4n}$
2. ReLU: $\mathbb{R}^{4n \to 4n}$
3. Linear: $\mathbb{R}^{n \to n}$

In [20]:
class FeedFoward(nn.Module):
    """ A simple one hidden layer ReLU block followed by a non-linearity """

    def __init__(self, n_embd):
        super().__init__()
        self.layer1 = nn.Linear(n_embd, 4*n_embd)
        self.layer2 = nn.Linear(4*n_embd, n_embd)
        return
        """
        Description: Linear -> ReLU -> Linear
        """

    def forward(self, x):
        "Forward pass of the network"
        x = F.relu(self.layer1(x))
        x = self.layer2(x)
        return x

### Part 4: Implement a Transformer block consisting of multi-head attention followed by feedforward computation

Using the MultiHeadAttention() and Feedforward() classes, define a Transformer block to perform:

1. LayerNorm in the embedding dimension using nn.LayerNorm(n_embd)
2. Multi-head attention with n_head heads of size head_size
3. Add Residual from input
4. LayerNorm in the embedding dimension using nn.LayerNorm(n_embd)
5. Feedforward layer: Linear -> ReLU -> Linear
6. Add Residual from Step 3

In [31]:
class Block(nn.Module):
    """ Transformer block: Multi head attention followed by feed forward followed by LayerNorm """

    def __init__(self, n_embd, n_head, head_size = 100):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()
        """
        Description: Transformer block
        Input:
            n_embd: embedding dimension of the input
            n_head: number of attention heads
        """
        head_size = n_embd // n_head #comment this line to load pretrained model - "model4"
        self.L1 = nn.LayerNorm(n_embd)
        self.L2 = MultiHeadAttention(n_head, head_size, n_embd)
        self.L3 = FeedFoward(n_embd)
        return

    def forward(self, x):
        "Forward pass of the attention block"
        residual = x
        x = self.L2(x)
        x = x + residual
        residual = x
        x = self.L3(x)
        x = x + residual
        return x


### Part 5: The full (decoder only) Transformer

Finally, lets put all of it together to construct the full decoder only transformer. Consider the tokenized input $x \in \mathbb{R}^{B \times T \times V}$, where $B$ is the batch size, $T$ is the context length and $V$ is the vocabulary size.

The model should perform the following computations:

1. T = Token embedding(x) $ \in \mathbb{R}^{B \times T \times C}$, where $C$ is the embedding dimension
2. P = Positional embedding(x) $\in \mathbb{R}^{T \times C}$ .For simplicity, we will use the index of the sequence as the positional embedding.
3. X = T + P  $ \in \mathbb{R}^{B \times T \times C}$
4. n_layers of Transformer blocks using the Block() module.
Use nn.Sequential() for stacking mulitple layers
5. LayerNorm in the embeding dimension
6. Linear transformation: $\mathbb{R}^{C} \to \mathbb{R}^{V}$






In [37]:
class Transformer(nn.Module):
    """
    Description: Decoder only transformer model"""
    def __init__(self, vocab_size, block_size, n_embd, n_layers, n_head):
        super().__init__()
        """
        Description: Decoder only transformer model
        Input:
            vocab_size (V) : Vocabulary dimension
            block_size (T): Context length
            n_embd (C): Embedding dimension
            n_layers: number of layers of Transformer blocks
            n_head : number of heads in multi-head attention
        """
        # replace None with appropriate implementations
        # for simplicity, think of embedding as a linear transformation and fill in the dimensions
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd, padding_idx = 0) # replace None with the appropriate variables
        self.position_embedding_table = nn.Embedding(block_size, n_embd) # replace None with the appropriate variables
        self.blocks = nn.Sequential(*[Block(n_embd, n_head) for itre in range(n_layers)]) # sequential layer of n_layers Transformer blocks
        self.layer_norm = nn.LayerNorm(n_embd) # Layer norm in the embedding dimension
        self.linear_head = nn.Linear(n_embd, vocab_size) # (C -> V)

    ### DO NOT MODIFY BEYOND THIS
    def forward(self, idx, targets=None):
        """
        Description: Forward pass of transformer
        Inputs:
            idx: The tokenized input sequence
            targets (optional): the tokenized target sequence
        """
        B, T = idx.shape
        # idx and targets are both (B,T) tensor of integers
        tok_emb = self.token_embedding_table(idx) # (B,T,C)
        pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
        x = (tok_emb + pos_emb).masked_fill(tok_emb==0, 0) # (B,T,C)
        x = self.blocks(x) # (B,T,C)
        x = self.layer_norm(x) # (B,T,C)
        logits = self.linear_head(x) # (B,T, V)

        # computes loss if targets are provided
        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.masked_fill(idx > 1 , 0)
            targets = targets.view(B*T)

            loss = F.cross_entropy(logits, targets, ignore_index = 0)

        return logits, loss





Estimates training and test loss

In [38]:
### DO NOT MODIFY THIS
# Estimates loss
@torch.no_grad()
def estimate_loss(model, frac = 0.27):
    model.eval()
    losses = torch.zeros(eval_iters)
    for k in range(eval_iters):
        X, Y = get_batch(train_word_list, train_len, batch_size, frac)
        logits, loss =  model(X, Y)
        losses[k] = loss.item()
    mean_loss = losses.mean()
    model.train()
    return mean_loss

In [39]:
# hyperparameters
batch_size = 200 # batch_size
block_size = 30 # maximum context length
max_iters = 5000
eval_interval = 1000
learning_rate = 1e-5

eval_iters = 50
n_embd = 99
n_head = 4
n_layers = 4
dropout = 0.0

In [40]:
torch.manual_seed(1090)
model = Transformer(vocab_size, block_size, n_embd+1, n_layers, n_head)
m = model.to(device)

'''training'''
# print the number of parameters in the model
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')

# create an optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr = learning_rate)

for iter in range(max_iters):
    # every once in a while evaluate the loss on train and val sets
    if iter % eval_interval == 0 or iter == max_iters - 1:
        train_loss = estimate_loss(m)
        print(f"step {iter}: train loss {train_loss:.4f}")

    # sample a batch of data
    xb, yb = get_batch(train_word_list, train_len, batch_size)
    # evaluate the loss
    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

# # generate from the model
# context = torch.zeros((1, 1), dtype=torch.long, device=device)
# print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))


0.491628 M parameters
step 0: train loss 3.6003
step 1000: train loss 2.8857
step 2000: train loss 2.8232
step 3000: train loss 2.6944
step 4000: train loss 2.6132
step 4999: train loss 2.5455


In [15]:
m = Transformer(vocab_size, block_size, n_embd+1, n_layers, n_head)
m = m.to(device)
m.load_state_dict(torch.load('model4'))


<All keys matched successfully>

In [52]:
'''More Training with different masking rates'''
optimizer = torch.optim.AdamW(m.parameters(), lr = 1e-4)
m.train()
for iter in range(5000):
    # every once in a while evaluate the loss on train and val sets
    if iter % eval_interval == 0 or iter == max_iters - 1:
        train_loss = estimate_loss(m, frac=0.15)
        print(f"step {iter}: train loss {train_loss:.4f}")

    # sample a batch of data
    xb, yb = get_batch(train_word_list, train_len, batch_size, frac = 0.32)
    # evaluate the loss
    logits, loss = m(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
    
    # sample a batch of data
    xb, yb = get_batch(train_word_list, train_len, batch_size, frac = 0.22)
    # evaluate the loss
    logits, loss = m(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

    # sample a batch of data
    xb, yb = get_batch(train_word_list, train_len, batch_size, frac = 0.12)
    # evaluate the loss
    logits, loss = m(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

step 0: train loss 2.4902
step 1000: train loss 2.0596
step 2000: train loss 1.9442
step 3000: train loss 1.8667
step 4000: train loss 1.8126
step 4999: train loss 1.7764


In [90]:
# print(m.predict(test_x).shape)
m.eval()

def guess(model, word):
    idx = torch.tensor(np.reshape(encode(word.ljust(30)), (1, -1))).to(device)
    logits, loss = model(idx)
#     for itr in idx[0]:
#         if itr not in [0, 1]:
#             logits[:, :, itr] = logits[:, :, itr] - 10000
    probs = F.softmax(logits, dim = -1).masked_fill(idx[:,:,None]!=1, 0)
    max_probs = np.sum(probs[0].to('cpu').detach().numpy(), axis = 0)

    return decode(list(np.argsort(-max_probs)))

def guess_word(model, word, n = 0, p = 1, guessed = '', max_depth = 4):
    if '_' not in word or max_depth == 0:
        return word, p, None
    idx = torch.tensor(np.reshape(encode(word.ljust(30)), (1, -1))).to(device)
    logits, loss = model(idx)
    
    letters_present = 0.1*torch.arange(2, 28, dtype = torch.float).repeat(block_size, 1) #(T, V)
    letters_present = letters_present - 0.1*torch.tensor(encode(guessed.ljust(30)))[ :, None] 
    mask = torch.prod(letters_present, dim = 0) == 0 # (V)

    probs = F.softmax(logits[:, :, 2:28], dim = -1).masked_fill(idx[:,:,None]!=1, 0).masked_fill(mask.to(device)[None, :], 0)

    probs = probs[0].to('cpu').detach().numpy() # (T, V)

    best_ix, best_letter = np.dstack(np.unravel_index(np.argsort(probs.ravel()), probs.shape))[0][-n-1]
    
    best_prob = probs[best_ix, best_letter]
    new_idx = idx
    new_idx[0, best_ix] = best_letter+2
    cand_word, cand_prob, _ = guess_word(model, decode(list(new_idx.to('cpu').numpy()[0])),p = p*best_prob, guessed=guessed, max_depth=max_depth-1)
    
    return cand_word, cand_prob, best_letter
    
def guess_words(model, word, r = 2, out = 0, guessed = '', max_depth = 4):
    cand_words, cand_probs, cand_letters = [], [], []
    for k in range(r):
        cand_word, cand_prob, best_letter = guess_word(model, word, n = k, guessed=guessed, max_depth = max_depth)
        cand_words.append(cand_word)
        cand_probs.append(cand_prob)
        cand_letters.append(best_letter)
        if out: print(cand_word, cand_prob, best_letter)
    return cand_words, cand_probs, cand_letters


# Basic player using greedy decoding

class HangmanPlayer:
    def __init__(self, model):
        self.guessed_letters = []
        self.model = model
        self.no_letter = True
    def guess(self, question):
        question = question.replace('#', '_')
        guesses = guess(self.model, question)
        for itr in guesses:
            if itr not in self.guessed_letters:
                self.guessed_letters.append(itr)
                return itr
        return 'a'
    def new_game(self):
        self.guessed_letters = []
        self.no_letter = True

#Advanced player exploring multiple candidate guesses via beam search

class HangmanPlayer2:
    def __init__(self, model, r= 10, max_depth = 1):
        self.guessed_letters = []
        self.model = model
        self.no_letter = True
        self.r = r
        self.max_depth = max_depth
    def guess(self, question, out = True):
        question = question.replace('#', '_')
        cand_words, cand_probs, cand_letters = guess_words(self.model,
                                                           question,
                                                           guessed = ''.join(self.guessed_letters),
                                                           r = self.r, max_depth = self.max_depth, out = out)
        probs = np.zeros(26)
        for itr in range(len(cand_probs)):
            probs[cand_letters[itr]] += cand_probs[itr]
        self.guessed_letters.append(chr(np.argmax(probs) + 97))
        return chr(np.argmax(probs) + 97)
    def new_game(self):
        self.guessed_letters = []
        self.no_letter = True



In [91]:

'''Example usage'''
# Initialize a basic and an advanced player using the same model m
player = HangmanPlayer(m)
player2 = HangmanPlayer2(m, r=10, max_depth=3)

# Play game with masked word "c__racci"
print('word : "c__racci"')
print('Basic PLayer :', player.guess("c__racci"))
print('Advanced Player : ', player2.guess("c__racci"))

word : "c__racci"
Basic PLayer : o
corracci                       0.20842319024445644 14
corracci                       0.12426284593662462 17
characci                       0.04796324174494737 7
cotracci                       0.06148024131231722 19
carracci                       0.0566899705950652 0
cheracci                       0.05087588744001614 4
couracci                       0.057592635103160816 20
characci                       0.04213072745798607 0
codracci                       0.031213457630465458 3
choracci                       0.025651959669659163 14
Advanced Player :  o


In [93]:
test_x, test_z = get_batch(train_word_list, train_len, 10)


In [94]:
''' test trained model on a few masked words'''

players = []

for r in [10]:
    for depth in [1, 2, 5]:
        players.append(HangmanPlayer2(m, r=r, max_depth=depth))



for itr in range(3,5):
    print('question : ', decode(list(test_x[itr].to('cpu').numpy())))
    print('answer: ', decode(list(test_z[itr].to('cpu').numpy())))
    question = decode(list(test_x[itr].to('cpu').numpy()))
    for player in players:
        print("r, max_depth, guess : ", player.r, player.max_depth, player.guess(question))
        player.guessed_letters = []


question :  lo___is_                      
answer:  lourdish                      
lo___ism                       0.30695298314094543 12
lo___ish                       0.2587122619152069 7
lo___ist                       0.18732082843780518 19
lo___ise                       0.17641445994377136 4
lo__nis_                       0.15688887238502502 13
lo_o_is_                       0.14794181287288666 14
lo_a_is_                       0.14699797332286835 0
lou__is_                       0.14162302017211914 20
lo_e_is_                       0.13472549617290497 4
lo__lis_                       0.12899984419345856 11
r, max_depth, guess :  10 1 e
lo__nism                       0.060420123027945305 12
lo__rish                       0.04208172472672933 7
lo__rist                       0.03585709325099096 19
lo_o_ise                       0.027894955202707905 4
lo__nish                       0.048033181793477375 13
lo_o_ism                       0.04369393327882287 14
lo_a_ism                   

In [106]:
'''Game'''
class HangmanServer:
    def __init__(self, player, n = 100):
        self.player = player
        self.test_words = self.read_test_words()[:100]
        

    @staticmethod
    def read_test_words():
        with open('../words_alpha_train_unique.txt') as f:
            words = f.read().split('\n')[:-1]
        np.random.shuffle(words)
        return words

    @staticmethod
    def data_iter(words):
        for word in words:
            _, answer = word.split(',')
            question = '#' * len(answer)
            yield question, answer

    def run(self):
        test_words = self.test_words
        qa_pair = self.data_iter(test_words)
        success = total = 0
        success_rate = 0
        print(f"Total Game Number: {len(test_words)}")
        for question, answer in qa_pair:
            self.player.new_game()
            tries = 6
            success_rate = 0 if total == 0 else success / total
            print("=" * 20, "Game %d" % (total + 1), '=' * 20, "Success Rate: %.2f" % success_rate)
            # if (total + 1) % 100 == 0:
            #     print(total + 1)
            print('provided question: ', " ".join(question))
            while '#' in question and tries > 0:
                guess = self.player.guess(question)
                question_lst = []
                for q_l, a_l in zip(question, answer):
                    if q_l == '#':
                        if a_l == guess:
                            question_lst.append(a_l)
                        else:
                            question_lst.append(q_l)
                    else:
                        question_lst.append(q_l)
                question = "".join(question_lst)
                if guess not in answer:
                    tries -= 1
                print("provided question: ", " ".join(question), "your guess: %s" % guess, "left tries: %d" % tries, 'answer: %s' % answer)

            if '#' not in question:
                success += 1
            total += 1

        print(f"{success} success out of {total} tries, rate: {success / total:.4f}")
        return(success / total)



server = HangmanServer(player)


In [109]:
'''test basic player'''
player = HangmanPlayer(m)
type(player)

win_rate = server.run()

Total Game Number: 100
provided question:  # # # # # # # #
provided question:  # # # # # # # # your guess: e left tries: 5 answer: arightly
provided question:  a # # # # # # # your guess: a left tries: 5 answer: arightly
provided question:  a # # # # # # # your guess: s left tries: 4 answer: arightly
provided question:  a r # # # # # # your guess: r left tries: 4 answer: arightly
provided question:  a r i # # # # # your guess: i left tries: 4 answer: arightly
provided question:  a r i # # # # # your guess: n left tries: 3 answer: arightly
provided question:  a r i # # t # # your guess: t left tries: 3 answer: arightly
provided question:  a r i # # t # # your guess: o left tries: 2 answer: arightly
provided question:  a r i # # t # # your guess: m left tries: 1 answer: arightly
provided question:  a r i # # t # # your guess: d left tries: 0 answer: arightly
provided question:  # # # #
provided question:  # e # # your guess: e left tries: 6 answer: feru
provided question:  # e r # your g