In [1]:
import time
from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F

from model.Models import Transformer, Transformer2
from model.Optim import CosineWithRestarts
from model.Batch import create_masks
from utils.utils import MyTokenizer, MyMasker
from utils.data import TextDataset
from torch.utils.data import Dataset, DataLoader, random_split

In [2]:
# Loading data
bs=128
dataset = TextDataset()
train_size = int(0.99*len(dataset))
test_size = len(dataset)-train_size

print(train_size, test_size)

225027 2273


In [3]:
masker = MyMasker()
tokenizer = MyTokenizer(32)

train_dataset, val_dataset = random_split(dataset, [train_size, test_size], generator=torch.Generator().manual_seed(0))

In [4]:
trainloader = DataLoader(dataset=train_dataset, batch_size=bs, shuffle=True, num_workers=0)
valloader = DataLoader(dataset=val_dataset, batch_size=bs, shuffle=True, num_workers=0)

In [5]:
# Loading Tranformer model from scratch
max_len = 32
model = Transformer(src_vocab=28, d_model=128, max_seq_len=max_len, N=12, heads=8, dropout=0.1)
model.to('cuda')
for p in model.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

In [6]:
masker = MyMasker()
tokenizer = MyTokenizer(max_len)

optim = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.98), eps=1e-9)

In [7]:
def train_model(model, bs, epochs, printevery):

    print("training model...")
    start = time.time()
    if torch.cuda.is_available():
        print('gpu detected!')
    else:
        print('no gpu detected')
        return 0

    model.train()
    for epoch in range(epochs):

        total_loss = 0

        for i, trg in enumerate(trainloader):

            # src = batch.src.transpose(0,1)
            # trg = batch.trg.transpose(0,1)
            # trg_input = trg[:, :-1]
            # src_mask, _ = create_masks(src, trg_input) # need to edit

            # test to check if overfit

            # src is the incomplete word
            perc=None
            src = masker.mask(trg, perc)  # e.g. [m_zh__n, _s, _w_eso_e]
            src = tokenizer.encode(src)  # e.g. [[], [], []]
            
            # trg is the complete word
            trg = tokenizer.encode(trg)

            # our src_mask is the same as trg_mask = mask
            mask, _ = create_masks(src)  # e.g. [[1, 1, 0, 0], [1, 0, 0, 0], [1, 1, 1, 0]]

            # Converting to cuda
            if torch.cuda.is_available():
                src = src.to('cuda')
                mask = mask.to('cuda')
                trg = trg.to('cuda')
            
            model.train()
            # preds = model(src, mask)
            preds = model(src)
            # ys = trg[:, 1:].contiguous().view(-1)
            # y = mask.squeeze(1)
            
            # 

            optim.zero_grad()
            loss = F.cross_entropy(preds.view(-1, preds.size(-1)), trg.contiguous().view(-1), ignore_index=0)
            loss.backward()
            optim.step()

            total_loss += loss.item()

            # print(i+1)
            if (i + 1) % printevery == 0:
                p = int(100 * (i + 1) / len(trainloader.dataset) * bs)
                avg_loss = total_loss / printevery
                print("\r   %dm: epoch %d [%s%s]  %d%%  loss = %.3f" %((time.time() - start)//60, epoch + 1, "".join('#'*(p//5)), "".join(' '*(20-(p//5))), p, avg_loss), end='')
                total_loss = 0

            
            if (i+1) % 10 == 0:
                torch.save(model.state_dict(), f'./weights/model_automask_weights_{datetime.today().strftime("%m%d%Y")}')
                pass
                
        total_val_loss = 0
        sims = 0
        for i, val in enumerate(valloader):
            perc=None
            src = masker.mask(val, perc)  # e.g. [m_zh__n, _s, _w_eso_e]
            src = tokenizer.encode(src)  # e.g. [[], [], []]
            
            # trg is the complete word
            val = tokenizer.encode(val)
            
            # our src_mask is the same as trg_mask = mask
            mask, _ = create_masks(src)  # e.g. [[1, 1, 0, 0], [1, 0, 0, 0], [1, 1, 1, 0]]
            
            # Converting to cuda
            if torch.cuda.is_available():
                src = src.to('cuda')
                mask = mask.to('cuda')
                val = val.to('cuda')
            
            model.eval()
            preds = model(src)
            
            loss = F.cross_entropy(preds.view(-1, preds.size(-1)), val.contiguous().view(-1), ignore_index=0)
            
            total_val_loss += loss.item()
            sims += 1
            if (i + 1) % printevery == 0:
                p = int(100 * (i + 1) / len(valloader.dataset) * bs)
                avg_val_loss = total_val_loss / sims
                print("\r   %dm: epoch %d [%s%s]  %d%%  loss = %.3f" %((time.time() - start)//60, epoch + 1, "".join('#'*(p//5)), "".join(' '*(20-(p//5))), p, avg_val_loss), end='')
            
        print("\r   %dm: epoch %d [%s%s]  %d%%  loss = %.3f\nepoch %d complete, val loss = %.03f" %\
        ((time.time() - start)//60, epoch + 1, "".join('#'*(100//5)), "".join(' '*(20-(100//5))), 100, avg_loss, epoch + 1, avg_val_loss))

In [8]:
# train_model(model, bs=bs, epochs=25, printevery=1)

In [9]:
start = ord('a')
alphabets = {'_': 27}
ids = {27:'_', 0:''}
for i in range(26):
    ch = chr(start)
    alphabets[ch] = i+1
    ids[i+1] = ch
    start += 1

In [10]:
from agent import Agent
from model.Models import PGN


pgn = PGN(src_vocab=28, d_model=128, max_seq_len=32, N=12, heads=8, dropout=0.1)
pgn.transformer.load_state_dict(torch.load('./weights/model_weights_03202024'))

'''
pgn = PGN(src_vocab=28, d_model=32, max_seq_len=32, N=2, heads=4, dropout=0.1)
pgn.transformer.load_state_dict(torch.load('./weights/model_weights_lite_1'))
'''

if torch.cuda.is_available():
    pgn.to('cuda')

pgn.eval()

PGN(
  (transformer): Transformer(
    (encoder): Encoder(
      (embed): Embedder(
        (embed): Embedding(28, 128, padding_idx=0)
      )
      (pe): PositionalEncoder(
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (layers): ModuleList(
        (0-11): 12 x EncoderLayer(
          (norm_1): Norm()
          (norm_2): Norm()
          (attn): MultiHeadAttention(
            (q_linear): Linear(in_features=128, out_features=128, bias=True)
            (v_linear): Linear(in_features=128, out_features=128, bias=True)
            (k_linear): Linear(in_features=128, out_features=128, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
            (out): Linear(in_features=128, out_features=128, bias=True)
          )
          (ff): FeedForward(
            (linear_1): Linear(in_features=128, out_features=2048, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
            (linear_2): Linear(in_features=2048, out_features=128, bias=True)
        

In [11]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from sklearn.utils import shuffle

class HangmanEnv(gym.Env):
    def __init__(self, dataloader, max_seq_len=32, init_counter=0):
        super(HangmanEnv, self).__init__()

        self.dataset = dataloader.dataset
        self.counter = init_counter
        self.max_seq_len = max_seq_len
        self.action_space = spaces.Discrete(26)  # 26 possible actions (a-z)
        self.observation_space = spaces.Box(low=0, high=27, shape=(self.max_seq_len,), dtype=int)

        self.hidden_word = None
        self.word_length = None

        self.guessed_letters = set()
        self.remaining_attempts = 6  # Maximum attempts
        self.current_state = np.zeros(32, dtype=int)  # Initial state
        self.game_over = False

    def reset(self, *, seed=0, options=None):
        self.hidden_word = self.dataset[self.counter]
        self.counter += 1
        self.word_length = len(self.hidden_word)

        self.guessed_letters = set()
        self.remaining_attempts = 6  # Maximum attempts
        self.current_state = np.zeros(32, dtype=int)  # Initial state
        self.game_over = False

        current_word = ''.join([char if char in self.guessed_letters else '_' for char in self.hidden_word])
        self.current_state = self.word2state(current_word)
        return self.current_state, {'word': current_word, 'hidden_word': self.hidden_word, 'guessed_letters': self.guessed_letters}

    def generate_random_word(self):
        # Replace this with your logic for generating random words
        word_list = self.dataset
        idx = self.counter % len(word_list)
        self.counter += 1
        return word_list[idx]

    def step(self, action):
        if action in self.guessed_letters:
            print("You have already guessed that letter.")
        else:
            self.guessed_letters.add(action)
            if action in self.hidden_word:
                reward = 0
            else:
                reward = 0
                self.remaining_attempts -= 1

        if set(self.hidden_word) <= self.guessed_letters or self.remaining_attempts == 0:
            reward = 1 if set(self.hidden_word) <= self.guessed_letters else 0
            self.game_over = True

        current_word = ''.join([char if char in self.guessed_letters else '_' for char in self.hidden_word])
        self.current_state = self.word2state(current_word)
        return self.current_state, reward, self.game_over, self.game_over, {'word': current_word, 'hidden_word': self.hidden_word, 'guessed_letters': self.guessed_letters}

    def word2state(self, word):
        state = [27 if char == '_' else ord(char) - ord('a') + 1 for char in word]
        while len(state) < self.max_seq_len:
            state.append(0)
        return state

In [12]:
def mini_sim(sample, env):
    env.reset(sample[0])
#     env.reset()
    n = len(sample[0])
    state = masker.mask(sample, 1)
    sample_mask, _ = create_masks(tokenizer.encode(sample))
    mask = sample_mask.to('cuda')
    y = sample_mask.squeeze(1).to('cuda')
    y_float = torch.where(y, 1., 0.)
    
    left = torch.ones((1, 28)).to('cuda')
    left[0,  0] = 0.
    left[0, -1] = 0.
    
    P = nn.Softmax(dim=-1)
    
    done = False
    
    cr = 0

    while not done:
        
        state = tokenizer.encode(state)
        state = state.to('cuda')

        probs = pgn(state)
        print(probs.shape)
        
        b_probs = torch.mul(probs, left)
        b_probs = b_probs / torch.sum(b_probs)
        b = torch.distributions.Categorical(probs=b_probs)

        action = b.sample()
        
        # using a greedy approach
        guess_id = torch.argmax(b_probs).item()
        
        # guess_id = action.item()
        guess = ids[guess_id]
        
        next_state, r, done = env.step(guess)
        state = [''.join(next_state)]
        
#         next_state, r, done, _, info = env.step(guess)
#         state = [info['word']]

        
        left[0, guess_id] = 0.
        
        cr += r
    
    return cr

In [22]:
def mini_sim_gymenv(sample, env):
#     env.reset(sample[0])
    env.reset()
    n = len(sample[0])
    state = masker.mask(sample, 1)
    sample_mask, _ = create_masks(tokenizer.encode(sample))
    mask = sample_mask.to('cuda')
    y = sample_mask.squeeze(1).to('cuda')
    y_float = torch.where(y, 1., 0.)
    
    left = torch.ones((1, 28)).to('cuda')
    left[0,  0] = 0.
    left[0, -1] = 0.
    
    P = nn.Softmax(dim=-1)
    
    done = False
    
    cr = 0

    while not done:
        
        state = tokenizer.encode(state)
        state = state.to('cuda')
        
        probs = pgn(state)
        
        b_probs = torch.mul(probs, left)
        b_probs = b_probs / torch.sum(b_probs)
        b = torch.distributions.Categorical(probs=b_probs)

        guess_id = b.sample()
        
        # get random action
        guess_id = 
        
        # using a greedy approach
#         guess_id = torch.argmax(b_probs)
        
        # guess_id = action.item()
        guess = ids[guess_id.item()]
        
#         next_state, r, done = env.step(guess)
#         state = [''.join(next_state)]
        
        next_state, r, done, _, info = env.step(guess)
        state = [info['word']]

        
        left[0, guess_id] = 0.
        
        cr += r
    
    return cr

In [26]:
from env.hangman import Hangman

def test_pgn(valloader):
    
    env = Hangman(n_episode=26)
    gym_env = HangmanEnv(valloader)
    
    wins = 0
    reward = 0
    total_games = 0
    start_time = time.time()
    pgn.eval()
    for i, state in enumerate(valloader):
        
#         if total_games > 100 : return
        
#         cr = mini_sim(state, env)
        cr = mini_sim_gymenv(state, gym_env)
        
        if cr > 0:
            wins += 1
            # print(state)
        total_games += 1
        reward += cr
        
        avg_reward = reward / total_games
        win_rate = wins / total_games
        
        mean_time_per_game = (time.time() - start_time) / total_games 
        
        print('\r  wins : %d \t total games : %d \t win rate : %.03f%% \t time_per_game : %.03f \t average reward : %.03f ' %(wins, total_games, 100*win_rate, mean_time_per_game, 0), end='')
     

In [27]:
trainloader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=False, num_workers=0)
valloader = DataLoader(dataset=val_dataset, batch_size=1, shuffle=False, num_workers=0)

In [28]:
t_ = time.time()
test_pgn(valloader)
print("\n", time.time() - t_)

  wins : 696 	 total games : 2273 	 win rate : 30.620% 	 time_per_game : 0.421 	 average reward : 0.000 
 956.7123963832855


In [None]:
valloader.dataset[0]

In [None]:
valloader.batch_size

In [6]:
def make_env(idx):
    def thunk():
        env = HangmanEnv(dataloader=valloader)
        return env
    return thunk

In [108]:
envs = gym.vector.SyncVectorEnv(
        [lambda: HangmanEnv(valloader) for i in range(valloader.batch_size)]
    )

In [109]:
# Example usage:
obs, info = envs.reset()
done = False
while not done:
    action = ['ab', 'a', 'a']
    obs, reward, terminated, truncated, info = envs.step(action)
    print(tokenizer.decode(obs))
    break
#     env.render()

{'word': array(['_________________', '___________', '_____________'], dtype=object), '_word': array([ True,  True,  True])}
['_________________', '_______a___', '_____________']


In [80]:
def mini_sim2(sample):
    env = HangmanEnv(sample[0])
    n = len(sample[0])
    state = masker.mask(sample, 1)
    sample_mask, _ = create_masks(tokenizer.encode(sample))
    mask = sample_mask.to('cuda')
    y = sample_mask.squeeze(1).to('cuda')
    y_float = torch.where(y, 1., 0.)
    
    left = torch.ones((1, 28)).to('cuda')
    left[0,  0] = 0.
    left[0, -1] = 0.
    
    P = nn.Softmax(dim=-1)
    
    done = False
    
    cr = 0

    while not done:
        
        # print(state)
        
        state = tokenizer.encode(state)
        state = state.to('cuda')
        
        # q_probs = score / torch.sum(score)
        
        probs = pgn(state, mask)
        
        b_probs = torch.mul(probs, left)
        b_probs = b_probs / torch.sum(b_probs)
        b = torch.distributions.Categorical(probs=b_probs)

        action = b.sample()
        
        # using a greedy approach
        guess_id = torch.argmax(b_probs).item()
        
        # guess_id = action.item()
        guess = ids[guess_id]
        
        next_state, r, done, _ = env.step(guess)
        
        state = [''.join(next_state)]
#         print(state) #, guess, r, next_state)
        
        left[0, guess_id] = 0.
        
        cr += r
        # print(guess, cr)
    
    return cr

In [81]:
# from env.hangman import Hangman, HangmanEnv

def test_pgn2(valloader):
    
    wins = 0
    reward = 0
    total_games = 0
    pgn.eval()
    for i, state in enumerate(valloader):
        
        if total_games > 10: return
        
        cr = mini_sim2(state)
        if cr > - 6:
            wins += 1
            # print(state)
        total_games += 1
        reward += cr
        
        avg_reward = reward / total_games
        win_rate = wins / total_games
        print('\r  wins : %d \t total games : %d \t win rate : %.03f%% \t reward : %.03f \t average reward : %.03f ' %(wins, total_games, 100*win_rate, cr, avg_reward), end='')

In [83]:
t_ = time.time()
test_pgn2(valloader)
print("\n", time.time() - t_)

  wins : 6 	 total games : 11 	 win rate : 54.545% 	 reward : -3.000 	 average reward : -5.364 
 1.878509521484375
