In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Disable tensorflow debugging logs
import json
import tensorflow as tf
import keras_nlp
import tensorflow_text as tf_text
from huggingface_hub import hf_hub_download
from model import GPT
from utils import sample

- Download weights

In [2]:
ckpt_dir = 'openwt_512_d_512/best-ckpt'

hf_hub_download(repo_id="milmor/gpt-mini", 
                filename=f"{ckpt_dir}/ckpt-616000.data-00000-of-00001",
                local_dir='./')

hf_hub_download(repo_id="milmor/gpt-mini", 
                filename=f"{ckpt_dir}/ckpt-616000.index",
                local_dir='./')

hf_hub_download(repo_id="milmor/gpt-mini", 
                filename=f"{ckpt_dir}/checkpoint",
                local_dir='./')

config_file = hf_hub_download(repo_id="milmor/gpt-mini", 
                filename="openwt_512_d_512/openwt_512_d_512_config.json",
                local_dir='./')

In [3]:
config_file

'./openwt_512_d_512/openwt_512_d_512_config.json'

In [4]:
with open(config_file) as f:
    config = json.load(f)

In [5]:
tokenizer = keras_nlp.models.GPT2Tokenizer.from_preset("gpt2_base_en", 
                                                       sequence_length=config['seq_len'])

In [6]:
model = GPT(vocab_size=config['vocab_size'], 
            maxlen=config['seq_len'], emb_dim=config['emb_dim'],
            heads=config['heads'], mlp_dim=config['mlp_dim'],
            depth=config['depth'], rate=config['dropout'], 
            initializer=config['initializer'])

- Initialize the model with a tokenized input

In [7]:
context = 'The silver wolf is'
t_context = tokenizer(tf_text.normalize_utf8(context, 'NFKD'))[tf.newaxis, :]

In [8]:
model(t_context)

<tf.Tensor: shape=(1, 512, 50257), dtype=float32, numpy=
array([[[-0.04117207, -0.01061924,  0.14581442, ..., -0.13442373,
         -0.14407264, -0.19951549],
        [-0.09236404, -0.00886059,  0.1487286 , ..., -0.18754527,
          0.09094065, -0.12930349],
        [-0.14570114,  0.04800888,  0.21148913, ..., -0.23972818,
         -0.0096932 , -0.19432347],
        ...,
        [-0.20269476, -0.09608932,  0.06118559, ..., -0.24732275,
          0.06152881, -0.25288272],
        [-0.10632226, -0.02713121,  0.10409077, ..., -0.16089398,
         -0.02387796, -0.1616822 ],
        [-0.14942929, -0.07109308,  0.13973953, ..., -0.22269347,
         -0.00802256, -0.24788411]]], dtype=float32)>

In [9]:
model.summary()

Model: "gpt"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 token_embedding (TokenEmbed  multiple                 25993728  
 ding)                                                           
                                                                 
 dropout_1 (Dropout)         multiple                  0         
                                                                 
 transformer_block (Transfor  multiple                 1577984   
 merBlock)                                                       
                                                                 
 transformer_block_1 (Transf  multiple                 1577984   
 ormerBlock)                                                     
                                                                 
 transformer_block_2 (Transf  multiple                 1577984   
 ormerBlock)                                                   

- Restore weights

In [10]:
ckpt = tf.train.Checkpoint(model=model, step=tf.Variable(0))
ckpt_manager = tf.train.CheckpointManager(ckpt, directory=ckpt_dir, 
                                          max_to_keep=1)
ckpt.restore(ckpt_manager.latest_checkpoint)
print(f'Checkpoint restored from {ckpt_manager.latest_checkpoint} at step {int(ckpt.step)}')

Checkpoint restored from openwt_512_d_512/best-ckpt/ckpt-616000 at step 616000


In [11]:
text = sample(model, 'The silver wolf is', config['seq_len'], max_len=128, k=40)
text

'The silver wolf is "the most important" sign ever.\n\nThe wolf in front as in 2008 became synonymous when Jim Brown-Maggie sat quietly out through this room last Tuesday over her face during debate in Miami Park‘tear at it lunchfocused panel . And it never looked quite interesting after reading Browner in on other questions—and to what appeared more surprising. So this evening though she began talking from on Friday during Thursday\'s questions with me in terms he knew not very good (and she got about how difficult they do her day because not much we call her after we reach the red wolf they\'ll live together'