In [1]:
import tiktoken
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2LMHeadModel, GPT2Tokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# 1. Recreate the same model architecture you used for training
model_2 = GPT2LMHeadModel.from_pretrained("gpt2")

# Replace the LM head for classification (2 classes)
model_2.lm_head = torch.nn.Linear(model_2.config.n_embd, 2)

# 2. Move model to the same device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_2.to(device)

# 3. Recreate the optimizer (same hyperparameters as training)
optimizer = torch.optim.AdamW(model_2.parameters(), lr=5e-5, weight_decay=0.1)

# 4. Load the checkpoint
checkpoint = torch.load("gpt2_finetuned_classification.pth", map_location=device)
model_2.load_state_dict(checkpoint["model_state_dict"], strict=False)
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

model_2.eval()
print("Model and optimizer successfully restored!")


Model and optimizer successfully restored!


In [3]:
print(model_2)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=2, bias=True)
)


In [4]:
# To evaluate the loaded model to classify a text as smap or not spam.
# Using our previously implemented function classify_review as follow:
def classify_review(
        text, model, tokenizer, device, max_length=None,
        pad_token_id=50256):
    
    model.eval()

    # Converts the input string into a list of token IDs the model can understand.
    input_ids = tokenizer.encode(text)   
    
    supported_context_length = model.config.n_positions
    input_ids = input_ids[:min(max_length, supported_context_length)]

    input_ids += [pad_token_id] * (max_length - len(input_ids))

    input_tensor = torch.tensor(input_ids, device=device).unsqueeze(0)

    with torch.no_grad(): 
        output = model(input_tensor).logits
        logits = output[:, -1, :]     # [:, -1, :]: Selects only the last token’s logits for classification.
        
    predicted_label = torch.argmax(logits, dim=-1).item()

    return 'spam' if predicted_label == 1 else 'not spam' 

In [5]:
text_1 = ("Congratulations! You have won a $1000 Walmart gift card. Claim your prize now!")
text_2 = ("Lets set a meeting for Saturday morning.")
tokenizer = tiktoken.get_encoding('gpt2')
print(classify_review(text_1, model_2, tokenizer, device, max_length=120))
print(classify_review(text_2, model_2, tokenizer, device, max_length=120))

spam
not spam
