In [1]:
import time
from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F

from model.Models import Transformer, Transformer2
from model.Optim import CosineWithRestarts
from model.Batch import create_masks
from utils.utils import MyTokenizer, MyMasker
from utils.data import TextDataset
from torch.utils.data import Dataset, DataLoader, random_split

In [2]:
# Loading data
bs=128
dataset = TextDataset()
train_size = int(0.99*len(dataset))
test_size = len(dataset)-train_size

print(train_size, test_size)

225027 2273


In [3]:
masker = MyMasker()
tokenizer = MyTokenizer(32)

train_dataset, val_dataset = random_split(dataset, [train_size, test_size], generator=torch.Generator().manual_seed(0))

In [4]:
trainloader = DataLoader(dataset=train_dataset, batch_size=bs, shuffle=True, num_workers=0)
valloader = DataLoader(dataset=val_dataset, batch_size=bs, shuffle=True, num_workers=0)

In [5]:
# Loading Tranformer model from scratch
max_len = 32
model = Transformer(src_vocab=28, d_model=128, max_seq_len=max_len, N=12, heads=8, dropout=0.1)
model.to('cuda')
for p in model.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

In [6]:
masker = MyMasker()
tokenizer = MyTokenizer(max_len)

optim = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.98), eps=1e-9)

In [7]:
def train_model(model, bs, epochs, printevery):

    print("training model...")
    start = time.time()
    if torch.cuda.is_available():
        print('gpu detected!')
    else:
        print('no gpu detected')
        return 0

    model.train()
    for epoch in range(epochs):

        total_loss = 0

        for i, trg in enumerate(trainloader):

            # src = batch.src.transpose(0,1)
            # trg = batch.trg.transpose(0,1)
            # trg_input = trg[:, :-1]
            # src_mask, _ = create_masks(src, trg_input) # need to edit

            # test to check if overfit

            # src is the incomplete word
            perc=None
            src = masker.mask(trg, perc)  # e.g. [m_zh__n, _s, _w_eso_e]
            src = tokenizer.encode(src)  # e.g. [[], [], []]
            
            # trg is the complete word
            trg = tokenizer.encode(trg)

            # our src_mask is the same as trg_mask = mask
            mask, _ = create_masks(src)  # e.g. [[1, 1, 0, 0], [1, 0, 0, 0], [1, 1, 1, 0]]

            # Converting to cuda
            if torch.cuda.is_available():
                src = src.to('cuda')
                mask = mask.to('cuda')
                trg = trg.to('cuda')
            
            model.train()
            # preds = model(src, mask)
            preds = model(src)
            # ys = trg[:, 1:].contiguous().view(-1)
            # y = mask.squeeze(1)
            
            # 

            optim.zero_grad()
            loss = F.cross_entropy(preds.view(-1, preds.size(-1)), trg.contiguous().view(-1), ignore_index=0)
            loss.backward()
            optim.step()

            total_loss += loss.item()

            # print(i+1)
            if (i + 1) % printevery == 0:
                p = int(100 * (i + 1) / len(trainloader.dataset) * bs)
                avg_loss = total_loss / printevery
                print("\r   %dm: epoch %d [%s%s]  %d%%  loss = %.3f" %((time.time() - start)//60, epoch + 1, "".join('#'*(p//5)), "".join(' '*(20-(p//5))), p, avg_loss), end='')
                total_loss = 0

            
            if (i+1) % 10 == 0:
                torch.save(model.state_dict(), f'./weights/model_automask_weights_{datetime.today().strftime("%m%d%Y")}')
                pass
                
        total_val_loss = 0
        sims = 0
        for i, val in enumerate(valloader):
            perc=None
            src = masker.mask(val, perc)  # e.g. [m_zh__n, _s, _w_eso_e]
            src = tokenizer.encode(src)  # e.g. [[], [], []]
            
            # trg is the complete word
            val = tokenizer.encode(val)
            
            # our src_mask is the same as trg_mask = mask
            mask, _ = create_masks(src)  # e.g. [[1, 1, 0, 0], [1, 0, 0, 0], [1, 1, 1, 0]]
            
            # Converting to cuda
            if torch.cuda.is_available():
                src = src.to('cuda')
                mask = mask.to('cuda')
                val = val.to('cuda')
            
            model.eval()
            preds = model(src)
            
            loss = F.cross_entropy(preds.view(-1, preds.size(-1)), val.contiguous().view(-1), ignore_index=0)
            
            total_val_loss += loss.item()
            sims += 1
            if (i + 1) % printevery == 0:
                p = int(100 * (i + 1) / len(valloader.dataset) * bs)
                avg_val_loss = total_val_loss / sims
                print("\r   %dm: epoch %d [%s%s]  %d%%  loss = %.3f" %((time.time() - start)//60, epoch + 1, "".join('#'*(p//5)), "".join(' '*(20-(p//5))), p, avg_val_loss), end='')
            
        print("\r   %dm: epoch %d [%s%s]  %d%%  loss = %.3f\nepoch %d complete, val loss = %.03f" %\
        ((time.time() - start)//60, epoch + 1, "".join('#'*(100//5)), "".join(' '*(20-(100//5))), 100, avg_loss, epoch + 1, avg_val_loss))

In [8]:
# train_model(model, bs=bs, epochs=25, printevery=1)

In [9]:
start = ord('a')
alphabets = {'_': 27}
ids = {27:'_', 0:''}
for i in range(26):
    ch = chr(start)
    alphabets[ch] = i+1
    ids[i+1] = ch
    start += 1

In [10]:
from agent import Agent
from model.Models import PGN


pgn = PGN(src_vocab=28, d_model=128, max_seq_len=32, N=12, heads=8, dropout=0.1)
pgn.transformer.load_state_dict(torch.load('./weights/model_weights_03202024'))

'''
pgn = PGN(src_vocab=28, d_model=32, max_seq_len=32, N=2, heads=4, dropout=0.1)
pgn.transformer.load_state_dict(torch.load('./weights/model_weights_lite_1'))
'''

if torch.cuda.is_available():
    pgn.to('cuda')

pgn.eval()

PGN(
  (transformer): Transformer(
    (encoder): Encoder(
      (embed): Embedder(
        (embed): Embedding(28, 128, padding_idx=0)
      )
      (pe): PositionalEncoder(
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (layers): ModuleList(
        (0-11): 12 x EncoderLayer(
          (norm_1): Norm()
          (norm_2): Norm()
          (attn): MultiHeadAttention(
            (q_linear): Linear(in_features=128, out_features=128, bias=True)
            (v_linear): Linear(in_features=128, out_features=128, bias=True)
            (k_linear): Linear(in_features=128, out_features=128, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
            (out): Linear(in_features=128, out_features=128, bias=True)
          )
          (ff): FeedForward(
            (linear_1): Linear(in_features=128, out_features=2048, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
            (linear_2): Linear(in_features=2048, out_features=128, bias=True)
        

In [65]:
def mini_sim(envs):
    obs, info = envs.reset()
    state = info['word']
    
    mask, _ = create_masks(torch.tensor(obs))
    mask = mask.to('cuda')
    
    left = torch.ones((1, 28)).to('cuda')
    left[0,  0] = 0.
    left[0, -1] = 0.
    
    done = False
    cr = 0
    while not done:       
        state = tokenizer.encode(state)
        state = state.to('cuda')
        
        probs = pgn(state, mask)
        
        b_probs = torch.mul(probs, left)
        b_probs = b_probs / torch.sum(b_probs)
        print(b_probs)
        b = torch.distributions.Categorical(probs=b_probs)

        action = b.sample()
        
        # using a greedy approach
        guess_id = torch.argmax(b_probs).item()
        
        # guess_id = action.item()
        guess = ids[guess_id]
        
        next_state, reward, terminated, truncated, info = envs.step(guess)
        state = info['word']

        
        left[0, guess_id] = 0.
        
        cr += reward
        # print(guess, cr)
    
    return cr

In [66]:
from env.hangman import Hangman, HangmanEnv
import gymnasium as gym

def test_pgn(valloader):
    
    envs = gym.vector.SyncVectorEnv(
        [lambda: HangmanEnv(valloader) for i in range(valloader.batch_size)]
    )
    
    wins = 0
    reward = 0
    total_games = 0
    pgn.eval()
    for i, state in enumerate(valloader):
        
        if total_games > 10 : return
        
        cr = mini_sim(envs)
        if cr > - 6:
            wins += 1
            # print(state)
        total_games += 1
        reward += cr
        
        avg_reward = reward / total_games
        win_rate = wins / total_games
        print('\r  wins : %d \t total games : %d \t win rate : %.03f%% \t reward : %.03f \t average reward : %.03f ' %(wins, total_games, 100*win_rate, cr, avg_reward), end='')

In [67]:
trainloader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=False, num_workers=0)
valloader = DataLoader(dataset=val_dataset, batch_size=1, shuffle=False, num_workers=0)

In [68]:
t_ = time.time()
test_pgn(valloader)
print("\n", time.time() - t_)

tensor([[0.0000, 0.0882, 0.0236, 0.0341, 0.0349, 0.1156, 0.0171, 0.0269, 0.0273,
         0.0818, 0.0022, 0.0160, 0.0640, 0.0275, 0.0645, 0.0615, 0.0277, 0.0016,
         0.0695, 0.0810, 0.0566, 0.0331, 0.0093, 0.0145, 0.0026, 0.0150, 0.0040,
         0.0000]], device='cuda:0', grad_fn=<DivBackward0>)
tensor([[0.0000, 0.0997, 0.0267, 0.0385, 0.0394, 0.0000, 0.0193, 0.0304, 0.0309,
         0.0925, 0.0025, 0.0181, 0.0723, 0.0311, 0.0729, 0.0696, 0.0314, 0.0018,
         0.0785, 0.0916, 0.0640, 0.0375, 0.0105, 0.0163, 0.0030, 0.0170, 0.0045,
         0.0000]], device='cuda:0', grad_fn=<DivBackward0>)
tensor([[0.0000, 0.0000, 0.0209, 0.0503, 0.0365, 0.0000, 0.0131, 0.0332, 0.0245,
         0.1198, 0.0011, 0.0078, 0.0780, 0.0337, 0.0858, 0.0888, 0.0374, 0.0017,
         0.0738, 0.1160, 0.0840, 0.0485, 0.0099, 0.0056, 0.0044, 0.0201, 0.0052,
         0.0000]], device='cuda:0', grad_fn=<DivBackward0>)
tensor([[0.0000, 0.0000, 0.0173, 0.1172, 0.0432, 0.0000, 0.0174, 0.0282, 0.0230,
         0

ValueError: Expected parameter probs (Tensor of shape (1, 28)) of distribution Categorical(probs: torch.Size([1, 28])) to satisfy the constraint Simplex(), but found invalid values:
tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
         nan, nan, nan, nan]], device='cuda:0', grad_fn=<DivBackward0>)

In [105]:
valloader.dataset[0]

'collocationable'

In [106]:
valloader.batch_size

3

In [107]:
def make_env(idx):
    def thunk():
        env = HangmanEnv(dataloader=valloader)
        return env
    return thunk

In [108]:
envs = gym.vector.SyncVectorEnv(
        [lambda: HangmanEnv(valloader) for i in range(valloader.batch_size)]
    )

In [109]:
# Example usage:
obs, info = envs.reset()
done = False
while not done:
    action = ['ab', 'a', 'a']
    obs, reward, terminated, truncated, info = envs.step(action)
    print(tokenizer.decode(obs))
    break
#     env.render()

{'word': array(['_________________', '___________', '_____________'], dtype=object), '_word': array([ True,  True,  True])}
['_________________', '_______a___', '_____________']


In [80]:
def mini_sim2(sample):
    env = HangmanEnv(sample[0])
    n = len(sample[0])
    state = masker.mask(sample, 1)
    sample_mask, _ = create_masks(tokenizer.encode(sample))
    mask = sample_mask.to('cuda')
    y = sample_mask.squeeze(1).to('cuda')
    y_float = torch.where(y, 1., 0.)
    
    left = torch.ones((1, 28)).to('cuda')
    left[0,  0] = 0.
    left[0, -1] = 0.
    
    P = nn.Softmax(dim=-1)
    
    done = False
    
    cr = 0

    while not done:
        
        # print(state)
        
        state = tokenizer.encode(state)
        state = state.to('cuda')
        
        # q_probs = score / torch.sum(score)
        
        probs = pgn(state, mask)
        
        b_probs = torch.mul(probs, left)
        b_probs = b_probs / torch.sum(b_probs)
        b = torch.distributions.Categorical(probs=b_probs)

        action = b.sample()
        
        # using a greedy approach
        guess_id = torch.argmax(b_probs).item()
        
        # guess_id = action.item()
        guess = ids[guess_id]
        
        next_state, r, done, _ = env.step(guess)
        
        state = [''.join(next_state)]
#         print(state) #, guess, r, next_state)
        
        left[0, guess_id] = 0.
        
        cr += r
        # print(guess, cr)
    
    return cr

In [81]:
# from env.hangman import Hangman, HangmanEnv

def test_pgn2(valloader):
    
    wins = 0
    reward = 0
    total_games = 0
    pgn.eval()
    for i, state in enumerate(valloader):
        
        if total_games > 10: return
        
        cr = mini_sim2(state)
        if cr > - 6:
            wins += 1
            # print(state)
        total_games += 1
        reward += cr
        
        avg_reward = reward / total_games
        win_rate = wins / total_games
        print('\r  wins : %d \t total games : %d \t win rate : %.03f%% \t reward : %.03f \t average reward : %.03f ' %(wins, total_games, 100*win_rate, cr, avg_reward), end='')

In [83]:
t_ = time.time()
test_pgn2(valloader)
print("\n", time.time() - t_)

  wins : 6 	 total games : 11 	 win rate : 54.545% 	 reward : -3.000 	 average reward : -5.364 
 1.878509521484375
