In [None]:
from comet_ml import Experiment
import torch
import transformers
from torchtext.datasets import WikiText2
from typing import List
import einops
from tqdm import tqdm

In [None]:
def count_periods(s, match) -> int:
    num_periods = 0
    for c in s:
        if c == match:
            num_periods += 1
    return num_periods

In [None]:
model = transformers.GPT2LMHeadModel.from_pretrained('gpt2')
ref_model = transformers.GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = transformers.GPT2Tokenizer.from_pretrained('gpt2')

In [None]:
train_iter  = WikiText2(split='train')
train_iter

<torchtext.data.datasets_utils._RawTextIterableDataset at 0x7fcc64780370>

In [None]:
train_dataset = list(train_iter)

In [None]:
train_dataset = [s.strip() for s in train_dataset]
train_dataset = [s.split(".")[0] for s in train_dataset if len(s) > 10]

In [None]:
len(train_dataset)

23527

In [None]:
train_dataset[:5]

['= Valkyria Chronicles III =',
 'Senjō no Valkyria 3 : <unk> Chronicles ( Japanese : 戦場のヴァルキュリア3 , lit ',
 'The game began development in 2010 , carrying over a large portion of the work done on Valkyria Chronicles II ',
 'It met with positive sales in Japan , and was praised by both Japanese and western critics ',
 '= = Gameplay = =']

In [None]:
# tokenized_train_dataset = tokenizer(train_dataset)['input_ids']
# # tokenized_train_dataset

In [None]:
prefix = tokenizer('This is')['input_ids']
prefix

[1212, 318]

In [None]:
def train(model, tokenizer, prefix: List[int], num_iters = 20, generation_length: int = 20, batch_size: int = 20, kl_coef: float = 0.05, lr: float = 3e-5, use_lambda_lr = True):
    
    experiment = Experiment(
        api_key="LHtOmtcbzAp2SrlasnUxvQsFn",
        project_name="policy-grad-nlp",
        workspace="nrimsky",
    )
    
    ignore_params = ["experiment", "tokenizer", "model"]

    
    experiment.log_parameters({k: v for (k, v) in locals().items() if k not in ignore_params})
    
    period_token_id = tokenizer.encode('.')[0]

    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    input_ids = torch.tensor(prefix).unsqueeze(0)
    prefix_len = input_ids.shape[-1]
    final_len = input_ids.shape[-1]+generation_length
    
    warmup_period = num_iters // 4
    lambda1 = lambda _iter: min([warmup_period, _iter]) // warmup_period
    
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1])

    with torch.autograd.set_detect_anomaly(True):
        for _iter in tqdm(range(num_iters)):

            # Shape: batch_size, seq_len (= prefix_len + gen_len)
            sequences = model.generate(input_ids, 
                                       max_length=final_len,
                                       min_length=final_len, 
                                       do_sample=True, 
                                       temperature=0.6, 
                                       top_k=len(tokenizer), 
                                       top_p=1.0, 
                                       use_cache=True,
                                       num_return_sequences=batch_size)

            rewards = [count_periods(sequences[i], match=period_token_id) for i in range(sequences.shape[0])]
            rewards = torch.tensor(rewards, dtype=torch.float)
            experiment.log_metric("mean reward", rewards.mean(), step=_iter)
            experiment.log_metric("std reward", rewards.std(), step=_iter)
            rewards = rewards - rewards.mean()
            rewards = rewards/(rewards.std() + 1e-6)            

            # Shape: batch_size, seq_len (= prefix_len + gen_len), vocab_size
            model_output_logits = model(sequences).logits
            model_output_probs = torch.nn.functional.softmax(model_output_logits, dim=-1)

            loss = 0
            n = (final_len - prefix_len)*sequences.shape[0]

            for i in tqdm(range(sequences.shape[0])):
                sequence = sequences[i]
                sequence_log_prob = 0
                for j in range(prefix_len, final_len):
                    token_id = sequence[j]
                    sequence_log_prob = sequence_log_prob + torch.log(model_output_probs[i, j, token_id])
                    loss = loss + (-sequence_log_prob * rewards[i])
                    experiment.log_metric("sequence_log_prob", sequence_log_prob)
                    
            loss = loss / n
            experiment.log_metric("loss", loss.item(), step=_iter)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            if use_lambda_lr:
                scheduler.step()
                scheduler.zero_grad()
            else:
                optimizer.step()
                optimizer.zero_grad()
    
    experiment.end()
    return model

In [None]:
model = transformers.GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id)
model = train(model, tokenizer, 
              prefix=prefix)

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/nrimsky/policy-grad-nlp/27e7738515f84213aa3fdbb92721fce0

  0%|          | 0/20 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.

  0%|          | 0/20 [00:00<?, ?it/s][A
 10%|█         | 2/20 [00:00<00:01, 15.90it/s][A
 20%|██        | 4/20 [00:00<00:01, 15.70it/s][A
 30%|███       | 6/20 [00:00<00:00, 15.37it/s][A
 40%|████      | 8/20 [00:00<00:00, 15.31it/s][A
 50%|█████     | 10/20 [00:00<00:00, 15.14it/s][A
 60%|██████    | 12/20 [00:00<00:00, 15.10it/s][A
 70%|███████   | 14/20 [00:00<00:00, 15.09it/s][A
 80%|████████  | 16/20 [00:01<00:00, 15.15it/s][A
 90%|█████████ | 18/20 [00:01<00:00, 15.15it/s][A
100%|██████████| 20/20 [00:01<00:00, 15.07it/s][A
  5%|▌         | 1/20 [01:24<26:48, 84.68s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.

  0%|          | 0/20 [00:00<?, ?it/s][A
 10%|█         | 2/20 [00:00<00:01, 16.04it/s][A
 20%|██ 

In [None]:
model.eval()
final_len = 20


In [None]:
input_ids = torch.tensor(prefix).unsqueeze(0)


In [None]:
input_ids = torch.tensor(tokenizer.encode('I think')).unsqueeze(0)

In [None]:
sequences = model.generate(input_ids, 
                           max_length=final_len,
                           min_length=final_len, 
                           do_sample=True, 
                           temperature=0.6, 
                           top_k=len(tokenizer), 
                           top_p=1.0, 
                           use_cache=True,
                           num_return_sequences=10)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [None]:
torch.save(model.state_dict(), 'gpt_period_generator.pt')

In [None]:
tokenizer.batch_decode(sequences)

['This is not a joke...............',
 'This is not about political correctness..............',
 'This is a very sad day for those who love and work hard every day to help these children,',
 'This is something that I have tried to do for years.........',
 'This is the kind of thing that makes us think of Hollywood, and it\'s really exciting," he',
 'This is what I do," he said............',
 'This is a very good opportunity for us to get a better understanding of what is going on in Syria',
 'This is not a good idea, and I hope it is not," he said....',
 'This is a culture of fear and division," he said.\n\n"This is something that has',
 'This is a really good day for America," he said.........']

In [None]:
tokenizer.batch_decode(sequences)

['I think it\'s pretty funny that a lot of people think that they can get away with murder,"',
 'I think we\'re on the right track," said one of the Republicans who has been pushing for a',
 'I think it\'s a shame that they would allow this to happen," said Mr. Muller. "',
 "I think it's important for them to have a sense of urgency.......",
 'I think it\'s important to view a few of these studies as a cautionary tale," says Dr',
 'I think we need to find a way to make sure that all our troops are trained in the best',
 'I think I learned something about myself that day."\n\nThe two were sitting on the porch of',
 "I think I'm gonna make a big deal out of it........",
 "I think it's important to remember that we have a long way to go to repair housing and rebuild",
 'I think there\'s a lot we can learn from these players," Robinson said. "I think we']

The KL-divergence term seems to win out too much. Maybe we should decrease KL coefficient to 0.01? The sequences are too similar to the original pretrained GPT-2

In [None]:
def kl_train(model, ref_model, tokenizer, prefix: List[int], num_iters = 20, generation_length: int = 20, 
             batch_size: int = 20, kl_coef: float = 0.05, lr: float = 3e-5, use_lambda_lr = True):
    
    experiment = Experiment(
        api_key="LHtOmtcbzAp2SrlasnUxvQsFn",
        project_name="policy-grad-nlp",
        workspace="nrimsky",
    )
    
    ignore_params = ["experiment", "tokenizer", "model", "ref_model"]

    
    experiment.log_parameters({k: v for (k, v) in locals().items() if k not in ignore_params})
    
    period_token_id = tokenizer.encode('.')[0]

    model.train()
    ref_model.eval()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    input_ids = torch.tensor(prefix).unsqueeze(0)
    prefix_len = input_ids.shape[-1]
    final_len = input_ids.shape[-1]+generation_length
    
    warmup_period = num_iters // 4
    lambda1 = lambda _iter: min([warmup_period, _iter]) // warmup_period
    
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1])

    with torch.autograd.set_detect_anomaly(True):
        for _iter in tqdm(range(num_iters)):

            # Shape: batch_size, seq_len (= prefix_len + gen_len)
            sequences = model.generate(input_ids, 
                                       max_length=final_len,
                                       min_length=final_len, 
                                       do_sample=True, 
                                       temperature=0.6, 
                                       top_k=len(tokenizer), 
                                       top_p=1.0, 
                                       use_cache=True,
                                       num_return_sequences=batch_size)

            rewards = [count_periods(sequences[i], match=period_token_id) for i in range(sequences.shape[0])]
            rewards = torch.tensor(rewards, dtype=torch.float)
            experiment.log_metric("mean reward", rewards.mean(), step=_iter)
            experiment.log_metric("std reward", rewards.std(), step=_iter)
            rewards = rewards - rewards.mean()
            rewards = rewards/(rewards.std() + 1e-6)            

            # Shape: batch_size, seq_len (= prefix_len + gen_len), vocab_size
            model_output_logits = model(sequences).logits
            model_output_probs = torch.nn.functional.softmax(model_output_logits, dim=-1)
            
            ref_model_output_logits = ref_model(sequences).logits
            ref_model_output_probs = torch.nn.functional.softmax(ref_model_output_logits, dim=-1)

            loss = 0
            kl = 0
            n = (final_len - prefix_len)*sequences.shape[0]

            for i in tqdm(range(sequences.shape[0])):
                sequence = sequences[i]
                pi = 1.0
                qi = 1.0
                for j in range(prefix_len, final_len):
                    token_id = sequence[j]
                    pi = pi * ref_model_output_probs[i, j, token_id]
                    qi = qi * model_output_probs[i, j, token_id]
                    loss = loss + (-torch.log(qi + 1e-40) * rewards[i])
                    experiment.log_metric("ref_sequence_prob", pi)
                    experiment.log_metric("sequence_prob", qi)
                    kl = kl + pi * torch.log(pi + 1e-40) - pi * torch.log(qi + 1e-40)
                    
            loss = loss + kl * kl_coef
            loss = loss / n
            experiment.log_metric("loss", loss.item(), step=_iter)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            if use_lambda_lr:
                scheduler.step()
                optimizer.zero_grad()
            else:
                optimizer.step()
                optimizer.zero_grad()
    
    experiment.end()
    return model

In [None]:
model = transformers.GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id)
ref_model = transformers.GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id)
model = kl_train(model, ref_model, tokenizer, prefix=prefix)

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/nrimsky/policy-grad-nlp/670be0b61dcf44bc9ffd541bc09e5edc
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     loss                    : 0.683276891708374
COMET INFO:     mean reward             : 1.0499999523162842
COMET INFO:     ref_sequence_prob [400] : (0.0, 0.0006263950490392745)
COMET INFO:     sequence_prob [400]     : (0.0, 0.0012835597153753042)
COMET INFO:     std reward              : 0.8870412111282349
COMET INFO:   Parameters:
COMET INFO:     batch_size        : 20
COMET INFO:     generation_length : 20
COMET INFO:     ignore_params     : ['experiment', 'tokenizer', 'model', 'ref_model']
COMET INFO:     kl_coef           : 0.05
COMET INFO:     lr                : 3e-05
COMET INFO:     num_iters         : 20
COMET INFO:     prefix

In [None]:
torch.save(model.state_dict(), 'gpt_period_generator_kl.pt')

In [None]:
final_len = 30
input_ids = torch.tensor(tokenizer.encode('We should')).unsqueeze(0)
model.eval()
sequences = model.generate(input_ids, 
                           max_length=final_len,
                           min_length=final_len, 
                           do_sample=True, 
                           temperature=0.6, 
                           top_k=len(tokenizer), 
                           top_p=1.0, 
                           use_cache=True,
                           num_return_sequences=10)

In [None]:
tokenizer.batch_decode(sequences)

['We should be able to operate in a way that they can look after themselves. That\'s what we\'re trying to do," the judge said.\n',
 'We should be really careful about how we can use the new technology to improve our health care," said Dr. Craig S. Beck, chief executive officer',
 'We should make sure that the land is a safe, secure and enjoyable place to live," said Robin Roberts, president of the California Fish and Game Commission',
 'We should be able to do it, and I think that\'s a good thing."\n\nThe New York Jets are in the midst of a rule',
 "We should ask you to submit your work for publication in the journal.\n\nI'm not sure why we didn't take this step in the first",
 "We should be honest, it's a bit traumatic for me and my family. Even though I'm not fighting for anything, I'm fighting for my",
 'We should be educating people on the internet about the dangers of using your phone to communicate with other people, and not just to talk to the wrong person',
 'We should be able

In [None]:
ref_model.eval()
sequences = ref_model.generate(input_ids, 
                           max_length=final_len,
                           min_length=final_len, 
                           do_sample=True, 
                           temperature=0.6, 
                           top_k=len(tokenizer), 
                           top_p=1.0, 
                           use_cache=True,
                           num_return_sequences=10)

In [None]:
tokenizer.batch_decode(sequences)

['We should be doing something about it," he said.\n\n"I\'m not going to be the only one who thinks that."\n\nD',
 'We should be looking in the mirror and seeing what we can do to change it. We have to do something. But we have to take it seriously',
 'We should be able to call it whatever it is we want to call it," he said.\n\nThe other issue is that there\'s a huge',
 'We should be able to treat them like this if they\'re not, and it\'s terrible that we\'re not," he said. "It\'s like',
 "We should be able to channel that energy into the right areas of our lives, and let's say we're engaged in a good conversation about what we",
 "We should do a lot more.\n\nHow would you like to see your business grow in the future?\n\nI think it's going to",
 'We should be able to do that," he said.\n\n"What we have to do is give people a way to stop doing that."\n',
 'We should be able to do it in a way that we can be able to communicate with people without having to send them a message," she said.

In [None]:
trained_strings = ['We should be able to operate in a way that they can look after themselves. That\'s what we\'re trying to do," the judge said.\n',
 'We should be really careful about how we can use the new technology to improve our health care," said Dr. Craig S. Beck, chief executive officer',
 'We should make sure that the land is a safe, secure and enjoyable place to live," said Robin Roberts, president of the California Fish and Game Commission',
 'We should be able to do it, and I think that\'s a good thing."\n\nThe New York Jets are in the midst of a rule',
 "We should ask you to submit your work for publication in the journal.\n\nI'm not sure why we didn't take this step in the first",
 "We should be honest, it's a bit traumatic for me and my family. Even though I'm not fighting for anything, I'm fighting for my",
 'We should be educating people on the internet about the dangers of using your phone to communicate with other people, and not just to talk to the wrong person',
 'We should be able to use the data as soon as we get it back," said Waddell.\n\nAnd he said he can use the',
 'We should be able to do all we can to keep our country safe, and we should be able to do all we can to keep our democracy safe',
 "We should also keep in mind that this is not a replacement for the popular benefits from the Affordable Care Act. As the ACA's provisions have been improved"]

ref_strings = ['We should be doing something about it," he said.\n\n"I\'m not going to be the only one who thinks that."\n\nD',
 'We should be looking in the mirror and seeing what we can do to change it. We have to do something. But we have to take it seriously',
 'We should be able to call it whatever it is we want to call it," he said.\n\nThe other issue is that there\'s a huge',
 'We should be able to treat them like this if they\'re not, and it\'s terrible that we\'re not," he said. "It\'s like',
 "We should be able to channel that energy into the right areas of our lives, and let's say we're engaged in a good conversation about what we",
 "We should do a lot more.\n\nHow would you like to see your business grow in the future?\n\nI think it's going to",
 'We should be able to do that," he said.\n\n"What we have to do is give people a way to stop doing that."\n',
 'We should be able to do it in a way that we can be able to communicate with people without having to send them a message," she said.',
 'We should have a monopoly on printing. We should have a monopoly on agitation."\n\nIn his speech to the World Trade Organisation, Mr Maduro said',
 'We should follow up on this, which is an important part of the process. We need to see more of the information that will help us to understand']

sum([count_periods(s, match='.') for s in trained_strings]), sum([count_periods(s, match='.') for s in ref_strings])

(9, 13)

In [None]:
ref_model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )


In [None]:
model.transformer.ln_f.weight.shape[0]

768

In [86]:
class GPT2WithValueHead(torch.nn.Module):
    def __init__(self, gpt2_model):
        super().__init__()
        hidden_size = gpt2_model.transformer.ln_f.weight.shape[0]
        self.mlp = torch.nn.Sequential(torch.nn.Linear(hidden_size, hidden_size),
                                       torch.nn.ReLU(),
                                       torch.nn.Linear(hidden_size, 1),
        )
        self.gpt2_model = gpt2_model
        
    def generate(self, *args, **kwargs):
        return self.gpt2_model.generate(*args, **kwargs)
        
    def forward(self, x):
        return self.mlp(self.gpt2_model.transformer(x).last_hidden_state)

    def parameters(self):
        return self.mlp.parameters()
        

In [None]:
base_model = transformers.GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id)
model = GPT2WithValueHead(base_model)
sample_input = torch.tensor(tokenizer.encode('something something'), dtype=torch.long).unsqueeze(0)
model(sample_input).shape

torch.Size([1, 2, 50257])

In [87]:
def value_function_train(model, tokenizer, prefix: List[int], num_iters = 20, generation_length: int = 20, batch_size: int = 20, kl_coef: float = 0.05, lr: float = 3e-5, use_lambda_lr = True, gamma = 0.99):
    
    experiment = Experiment(
        api_key="LHtOmtcbzAp2SrlasnUxvQsFn",
        project_name="policy-grad-nlp",
        workspace="nrimsky",
    )
    
    ignore_params = ["experiment", "tokenizer", "model"]

    experiment.log_parameters({k: v for (k, v) in locals().items() if k not in ignore_params})
    
    period_token_id = tokenizer.encode('.')[0]

    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    input_ids = torch.tensor(prefix).unsqueeze(0)
    prefix_len = input_ids.shape[-1]
    final_len = input_ids.shape[-1]+generation_length
    
    warmup_period = num_iters // 4
    lambda1 = lambda _iter: min([warmup_period, _iter]) // warmup_period
    
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1])

    with torch.autograd.set_detect_anomaly(True):
        for _iter in tqdm(range(num_iters)):

            # Shape: batch_size, seq_len (= prefix_len + gen_len)
            sequences = model.generate(input_ids, 
                                       max_length=final_len,
                                       min_length=final_len, 
                                       do_sample=True, 
                                       temperature=0.6, 
                                       top_k=len(tokenizer), 
                                       top_p=1.0, 
                                       use_cache=True,
                                       num_return_sequences=batch_size)

            rewards = [count_periods(sequences[i], match=period_token_id) for i in range(sequences.shape[0])]
            rewards = torch.tensor(rewards, dtype=torch.float)
            experiment.log_metric("mean reward", rewards.mean(), step=_iter)
            experiment.log_metric("std reward", rewards.std(), step=_iter)       

            # Shape: batch_size, seq_len (= prefix_len + gen_len), vocab_size
            model_output_values = model(sequences)

            loss = 0

            for i in tqdm(range(sequences.shape[0])):
                sequence = sequences[i]
                for j in range(prefix_len, final_len):
                    # token_id = sequence[j]
                    v_st = model_output_values[i, j]
                    
                    if j == final_len-1:
                        # Reward not return!
                        r_t = rewards[i]
                        td_error = (v_st - r_t)**2
                    else:
                        next_token_id = sequence[j+1]
                        r_t = 0
                        v_st_plus1 = model_output_values[i, j+1]
                        td_error = (v_st - (r_t + gamma*v_st_plus1))**2
                    loss = loss + td_error
                    
            n = (final_len - prefix_len)*sequences.shape[0]
            loss = loss / n
            experiment.log_metric("loss", loss.item(), step=_iter)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            if use_lambda_lr:
                scheduler.step()
                optimizer.zero_grad()
            else:
                optimizer.step()
                optimizer.zero_grad()
    
    experiment.end()
    return model

In [88]:
base_model = transformers.GPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id)
model = GPT2WithValueHead(base_model)
model = value_function_train(model, tokenizer, prefix=prefix)

#https://huggingface.co/docs/transformers/main_classes/output#transformers.modeling_outputs.BaseModelOutputWithPastAndCrossAttentions

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/nrimsky/policy-grad-nlp/d31d3b8091724e48941dbacfb58b596f
COMET INFO:   Metrics:
COMET INFO:     mean reward : 1.0499999523162842
COMET INFO:     std reward  : 0.825577974319458
COMET INFO:   Parameters:
COMET INFO:     batch_size        : 20
COMET INFO:     gamma             : 0.99
COMET INFO:     generation_length : 20
COMET INFO:     ignore_params     : ['experiment', 'tokenizer', 'model']
COMET INFO:     kl_coef           : 0.05
COMET INFO:     lr                : 3e-05
COMET INFO:     num_iters         : 20
COMET INFO:     prefix            : [1212, 318]
COMET INFO:     use_lambda_lr     : True
COMET INFO:   Uploads:
COMET INFO:     environment details      : 1
COMET INFO:     filename                 : 1
COMET INFO:     git metadata          

In [89]:
torch.save(model.state_dict(), 'gpt_with_value_head_v2.pt')

In [82]:
model.eval()
sample_input = torch.tensor(tokenizer.encode('This is a sentence with lots of periods....... Hello.'), dtype=torch.long).unsqueeze(0)
# batch_size, seq_len, vocab_size
values = model(sample_input)

tensor([[[ 0.5146, -1.9785, -0.7302,  ..., -0.3247,  0.9336,  0.7891],
         [ 0.1751, -6.8242, -1.9052,  ..., -0.9868,  2.7618,  2.5768],
         [ 0.9491, -6.9109, -1.7530,  ..., -1.2891,  3.2759,  2.2900],
         ...,
         [-0.4792, -4.8468, -0.8634,  ..., -0.7643,  3.7331,  3.3081],
         [ 0.1291, -4.2751, -1.1406,  ..., -0.3562,  2.8611,  2.1246],
         [ 0.3402, -7.0900, -1.8633,  ..., -0.6261,  4.8839,  3.6403]]],
       grad_fn=<AddBackward0>)

In [110]:
model.eval()
for s in ('This is a sentence. One with lots of periods. I like short sentences. They are good. Short and sweet.',
         'This does not have many periods. But I like it because I like long rambling sentences that go on and on.'):
    sample_input = torch.tensor(tokenizer.encode(s), dtype=torch.long).unsqueeze(0)
    # batch_size, seq_len, vocab_size
    values = model(sample_input).detach().squeeze()
    print(s)
    for i in range(sample_input.shape[-1]):
        print(tokenizer.decode(sample_input[0, i]), f"{values[i].item(): .2}")

This is a sentence. One with lots of periods. I like short sentences. They are good. Short and sweet.
This  0.41
 is  2.6
 a  2.7
 sentence  2.5
.  3.0
 One  2.4
 with  2.2
 lots  0.13
 of  2.5
 periods  1.8
.  3.2
 I  3.8
 like  2.2
 short  2.1
 sentences  2.5
.  3.4
 They  3.5
 are  3.3
 good  2.7
.  3.5
 Short  1.7
 and  2.7
 sweet  1.9
.  3.3
This does not have many periods. But I like it because I like long rambling sentences that go on and on.
This  0.41
 does  1.9
 not  2.8
 have  2.0
 many  2.6
 periods  2.2
.  3.3
 But  3.6
 I  4.0
 like  2.0
 it  3.0
 because  2.5
 I  3.9
 like  2.4
 long  2.2
 r  2.0
ambling  2.9
 sentences  2.6
 that  3.4
 go  2.7
 on  2.5
 and  2.3
 on  2.0
.  3.2


In [98]:
values

tensor([0.4069, 2.6169, 2.6684, 2.4979, 2.3394, 0.9901, 2.5146, 1.9829, 3.3359,
        2.0152, 2.0932, 2.2103, 1.7048, 2.5425])

In [94]:
tokenizer.encode('This is a sentence with lots of periods....... Hello.')

[1212, 318, 257, 6827, 351, 6041, 286, 9574, 25780, 18435, 13]

In [104]:
tokenizer.decode([25780])

'.......'