In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

device = "cuda:0"
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
model = AutoModelForCausalLM.from_pretrained(
  "stabilityai/stablelm-3b-4e1t",
  trust_remote_code=True,
  torch_dtype=torch.float16,
)
model.to(device)

StableLMEpochForCausalLM(
  (model): StableLMEpochModel(
    (embed_tokens): Embedding(50304, 2560)
    (layers): ModuleList(
      (0-31): 32 x DecoderLayer(
        (self_attn): Attention(
          (q_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (k_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (v_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (o_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (rotary_emb): RotaryEmbedding()
        )
        (mlp): MLP(
          (gate_proj): Linear(in_features=2560, out_features=6912, bias=False)
          (up_proj): Linear(in_features=2560, out_features=6912, bias=False)
          (down_proj): Linear(in_features=6912, out_features=2560, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_a

In [11]:
def zero_padding_multiplicatn(num_1: int, num_2: int, padding_size: int = 0, reverse: bool = False) -> str:
    num_1 = str(num_1)
    num_2 = str(num_2)
    answer = int(num_1) * int(num_2)
    num_1 = "0" * (padding_size - len(num_1)) + num_1
    num_2 = "0" * (padding_size - len(num_2)) + num_2
    answer = str(answer).reverse() if reverse else str(answer)
    return f"{num_1} * {num_2} = {answer}"

In [10]:
zero_padding_multiplicatn(123, 456, 6)

'000123 * 000456 = 56088'

In [23]:
import random

max_num = 9
max_num_len = len(str(max_num**2))
eval_examples = []
for _ in range(20):
    num_1 = random.randint(0, max_num)
    num_2 = random.randint(0, max_num)
    eval_examples.append(zero_padding_multiplicatn(num_1, num_2, max_num_len))
few_shot_examples = []
few_shot_num = 5
for _ in range(few_shot_num):
    num_1 = random.randint(0, max_num)
    num_2 = random.randint(0, max_num)
    few_shot_examples.append(zero_padding_multiplicatn(num_1, num_2, max_num_len))
prompt = "\n".join(few_shot_examples)
print(prompt)

08 * 07 = 56
03 * 04 = 12
07 * 05 = 35
08 * 01 = 8
09 * 08 = 72


In [28]:
eval_examples[0][-max_num_len:]

'12'

In [55]:
from transformers.utils import logging

logging.set_verbosity_error()

In [59]:
answers = []
for example in eval_examples:
    test_prompt = prompt + "\n" + example[:-max_num_len-1]
    truth = example[-max_num_len:]
    inputs = tokenizer(test_prompt, return_tensors="pt").input_ids.to(device)
    outputs = model.generate(inputs, max_length=64, do_sample=False)
    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)[len(test_prompt)+1:len(test_prompt)+1+max_num_len]
    #print(f"Test Prompt: {example}")
    #print(f"Prediction: {prediction}")
    #print(f"Truth: {truth}")
    answers.append(int(prediction == truth))
print(f"Accuracy: {sum(answers)/len(answers)}")

Test Prompt: 04 * 03 = 12
Prediction: 12
Truth: 12
Test Prompt: 00 * 07 = 0
Prediction: 01
Truth:  0
Test Prompt: 04 * 03 = 12
Prediction: 12
Truth: 12
Test Prompt: 07 * 09 = 63
Prediction: 63
Truth: 63
Test Prompt: 05 * 04 = 20
Prediction: 20
Truth: 20
Test Prompt: 04 * 02 = 8
Prediction: 05
Truth:  8
Test Prompt: 01 * 03 = 3
Prediction: 02
Truth:  3
Test Prompt: 00 * 04 = 0
Prediction: 01
Truth:  0
Test Prompt: 05 * 09 = 45
Prediction: 45
Truth: 45
Test Prompt: 04 * 09 = 36
Prediction: 36
Truth: 36
Test Prompt: 05 * 01 = 5
Prediction: = 
Truth:  5
Test Prompt: 05 * 09 = 45
Prediction: 45
Truth: 45
Test Prompt: 06 * 05 = 30
Prediction: 30
Truth: 30
Test Prompt: 06 * 02 = 12
Prediction: 12
Truth: 12
Test Prompt: 07 * 00 = 0
Prediction: 10
Truth:  0
Test Prompt: 05 * 05 = 25
Prediction: 25
Truth: 25
Test Prompt: 07 * 00 = 0
Prediction: 10
Truth:  0
Test Prompt: 07 * 08 = 56
Prediction: 56
Truth: 56
Test Prompt: 01 * 07 = 7
Prediction: 02
Truth:  7
Test Prompt: 04 * 08 = 32
Prediction: 3

In [47]:
prediction[len(test_prompt)+1:len(test_prompt)+1+max_num_len]

'12'

In [36]:
test_prompt

'08 * 07 = 56\n03 * 04 = 12\n07 * 05 = 35\n08 * 01 = 8\n09 * 08 = 72\n04'

In [7]:
inputs = tokenizer(prompt, return_tensors="pt").to(device)
tokens = model.generate(
  **inputs,
  max_new_tokens=64,
  temperature=0.75,
  top_p=0.95,
  do_sample=True,
)
print(tokenizer.decode(tokens[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


5*5=25 because 5*3=15 and 5*2=10.
4*8=32 because 4*4=16 and 4*4=16.
3*9=27 because 3*5=15 and 3*4=12.
3*5=15 because 3*5=15 and 3*5=15.
4*5=20 because 4*2=8 and 4*4=16.
4*4=16 because 4*2=8 and
