In [15]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

device = "cuda:0"
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
model = AutoModelForCausalLM.from_pretrained(
  "stabilityai/stablelm-3b-4e1t",
  trust_remote_code=True,
  torch_dtype=torch.float16,
)
model.to(device)

StableLMEpochForCausalLM(
  (model): StableLMEpochModel(
    (embed_tokens): Embedding(50304, 2560)
    (layers): ModuleList(
      (0-31): 32 x DecoderLayer(
        (self_attn): Attention(
          (q_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (k_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (v_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (o_proj): Linear(in_features=2560, out_features=2560, bias=False)
          (rotary_emb): RotaryEmbedding()
        )
        (mlp): MLP(
          (gate_proj): Linear(in_features=2560, out_features=6912, bias=False)
          (up_proj): Linear(in_features=2560, out_features=6912, bias=False)
          (down_proj): Linear(in_features=6912, out_features=2560, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_a

In [5]:
def zero_padding_multiplicatn(num_1: int, num_2: int, padding_size: int = 0, reverse: bool = False) -> str:
    num_1 = str(num_1)
    num_2 = str(num_2)
    answer = int(num_1) * int(num_2)
    num_1 = "0" * (padding_size - len(num_1)) + num_1 if padding_size else num_1
    num_2 = "0" * (padding_size - len(num_2)) + num_2 if padding_size else num_2
    answer = str(answer).reverse() if reverse else str(answer)
    answer = "0" * (padding_size - len(answer)) + answer if padding_size else answer
    return f"{num_1} * {num_2} = {answer}"

In [8]:
zero_padding_multiplicatn(123, 456, 0)

'123 * 456 = 56088'

In [2]:
99**2, 100**2

(9801, 10000)

In [22]:
import random

max_num = 9
max_num_len = len(str(max_num**2))
eval_examples_padded = []
for i in range(1, max_num + 1):
    for j in range(1, max_num + 1):
        eval_examples_padded.append(zero_padding_multiplicatn(i, j, max_num_len))

eval_examples_base = []
for i in range(1, max_num + 1):
    for j in range(1, max_num + 1):
        eval_examples_base.append(f"{i} * {j} = {i * j}")
    
few_shot_examples = []
few_shot_num = 5
for _ in range(few_shot_num):
    num_1 = random.randint(0, max_num)
    num_2 = random.randint(0, max_num)
    few_shot_examples.append(zero_padding_multiplicatn(num_1, num_2, max_num_len))
prompt = "\n".join(few_shot_examples)
print(prompt)

04 * 09 = 36
02 * 02 = 04
04 * 06 = 24
01 * 09 = 09
00 * 02 = 00


In [14]:
eval_examples

['01 * 01 = 01',
 '01 * 02 = 02',
 '01 * 03 = 03',
 '01 * 04 = 04',
 '01 * 05 = 05',
 '01 * 06 = 06',
 '01 * 07 = 07',
 '01 * 08 = 08',
 '01 * 09 = 09',
 '02 * 01 = 02',
 '02 * 02 = 04',
 '02 * 03 = 06',
 '02 * 04 = 08',
 '02 * 05 = 10',
 '02 * 06 = 12',
 '02 * 07 = 14',
 '02 * 08 = 16',
 '02 * 09 = 18',
 '03 * 01 = 03',
 '03 * 02 = 06',
 '03 * 03 = 09',
 '03 * 04 = 12',
 '03 * 05 = 15',
 '03 * 06 = 18',
 '03 * 07 = 21',
 '03 * 08 = 24',
 '03 * 09 = 27',
 '04 * 01 = 04',
 '04 * 02 = 08',
 '04 * 03 = 12',
 '04 * 04 = 16',
 '04 * 05 = 20',
 '04 * 06 = 24',
 '04 * 07 = 28',
 '04 * 08 = 32',
 '04 * 09 = 36',
 '05 * 01 = 05',
 '05 * 02 = 10',
 '05 * 03 = 15',
 '05 * 04 = 20',
 '05 * 05 = 25',
 '05 * 06 = 30',
 '05 * 07 = 35',
 '05 * 08 = 40',
 '05 * 09 = 45',
 '06 * 01 = 06',
 '06 * 02 = 12',
 '06 * 03 = 18',
 '06 * 04 = 24',
 '06 * 05 = 30',
 '06 * 06 = 36',
 '06 * 07 = 42',
 '06 * 08 = 48',
 '06 * 09 = 54',
 '07 * 01 = 07',
 '07 * 02 = 14',
 '07 * 03 = 21',
 '07 * 04 = 28',
 '07 * 05 = 35

In [28]:
eval_examples[0][-max_num_len:]

'12'

In [16]:
from transformers.utils import logging

logging.set_verbosity_error()

In [21]:
len(eval_examples)

243

In [46]:
from tqdm import tqdm
answers = []
batch_size = 32
for i in tqdm(range(0, len(eval_examples), batch_size)):
    queries = eval_examples[i:i+batch_size]
    test_prompts = [prompt + "\n" + query[:-max_num_len-1] for query in queries]
    truths = [query[-max_num_len:] for query in queries]

    inputs = tokenizer(test_prompts, return_tensors="pt").input_ids.to(device)
    outputs = model.generate(inputs, max_length=64, do_sample=False)
    prediction = tokenizer.batch_decode(outputs, skip_special_tokens=True)#[len(test_prompt)+1:len(test_prompt)+1+max_num_len]
    predictions = [p[len(test_prompts[0])+1:len(test_prompts[0])+1+max_num_len] for p in prediction]
    for prediction, truth in zip(predictions, truths):
        answers.append(int(prediction == truth))
    #print(f"Test Prompt: {example}")
    #print(f"Prediction: {prediction}")
    #print(f"Truth: {truth}")
    #answers.append(int(prediction == truth))
print(f"Accuracy: {sum(answers)/len(answers)}")

100%|██████████| 8/8 [00:12<00:00,  1.57s/it]

Accuracy: 0.9753086419753086





In [None]:
tokenizer.bat

In [37]:
prediction = tokenizer.batch_decode(outputs, skip_special_tokens=True)

In [39]:
prediction

['04 * 09 = 36\n02 * 02 = 04\n04 * 06 = 24\n01 * 09 = 09\n00 * 02 = 00\n01 * 01 = 01\n02 * 02 = 04\n03 * 03 = 09\n04 * 04 = 16\n05 * 05 = 25\n06 * 06 =',
 '04 * 09 = 36\n02 * 02 = 04\n04 * 06 = 24\n01 * 09 = 09\n00 * 02 = 00\n01 * 02 = 02\n02 * 06 = 12\n00 * 09 = 00\n01 * 09 = 09\n02 * 02 = 04\n04 * 06 =',
 '04 * 09 = 36\n02 * 02 = 04\n04 * 06 = 24\n01 * 09 = 09\n00 * 02 = 00\n01 * 03 = 03\n02 * 04 = 08\n03 * 05 = 15\n04 * 06 = 24\n05 * 07 = 35\n06 * 08 =',
 '04 * 09 = 36\n02 * 02 = 04\n04 * 06 = 24\n01 * 09 = 09\n00 * 02 = 00\n01 * 04 = 04\n02 * 06 = 12\n03 * 09 = 27\n04 * 02 = 08\n05 * 04 = 20\n06 * 06 =',
 '04 * 09 = 36\n02 * 02 = 04\n04 * 06 = 24\n01 * 09 = 09\n00 * 02 = 00\n01 * 05 = 05\n02 * 06 = 12\n03 * 07 = 21\n04 * 08 = 32\n05 * 09 = 45\n06 * 10 =',
 '04 * 09 = 36\n02 * 02 = 04\n04 * 06 = 24\n01 * 09 = 09\n00 * 02 = 00\n01 * 06 = 06\n02 * 09 = 18\n03 * 04 = 12\n04 * 06 = 24\n05 * 09 = 45\n06 * 02 =',
 '04 * 09 = 36\n02 * 02 = 04\n04 * 06 = 24\n01 * 09 = 09\n00 * 02 = 00\n01 *

In [31]:
inputs = tokenizer([test_prompt, test_prompt], return_tensors="pt").input_ids.to(device)
outputs = model.generate(inputs, max_length=64, do_sample=False)
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)[len(test_prompt)+1:len(test_prompt)+1+max_num_len]

In [33]:
outputs

tensor([[ 2125,   475, 15630,   426,  5540,   187,  2640,   475, 16261,   426,
         16703,   187,  2125,   475, 17796,   426,  2164,   187,   520,   475,
         15630,   426, 15630,   187,   361,   475, 16261,   426,  7449,   187,
          2693,   475, 15630,   426, 11681,   187,   361,   475, 17796,   426,
          7449,   187,   361,   475, 16703,   426,  7449,   187,   361,   475,
         17272,   426,  7449,   187,   361,   475, 16261,   426,  7449,   187,
           361,   475, 14805,   426],
        [ 2125,   475, 15630,   426,  5540,   187,  2640,   475, 16261,   426,
         16703,   187,  2125,   475, 17796,   426,  2164,   187,   520,   475,
         15630,   426, 15630,   187,   361,   475, 16261,   426,  7449,   187,
          2693,   475, 15630,   426, 11681,   187,   361,   475, 17796,   426,
          7449,   187,   361,   475, 16703,   426,  7449,   187,   361,   475,
         17272,   426,  7449,   187,   361,   475, 16261,   426,  7449,   187,
           361

In [47]:
prediction[len(test_prompt)+1:len(test_prompt)+1+max_num_len]

'12'

In [36]:
test_prompt

'08 * 07 = 56\n03 * 04 = 12\n07 * 05 = 35\n08 * 01 = 8\n09 * 08 = 72\n04'

In [7]:
inputs = tokenizer(prompt, return_tensors="pt").to(device)
tokens = model.generate(
  **inputs,
  max_new_tokens=64,
  temperature=0.75,
  top_p=0.95,
  do_sample=True,
)
print(tokenizer.decode(tokens[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


5*5=25 because 5*3=15 and 5*2=10.
4*8=32 because 4*4=16 and 4*4=16.
3*9=27 because 3*5=15 and 3*4=12.
3*5=15 because 3*5=15 and 3*5=15.
4*5=20 because 4*2=8 and 4*4=16.
4*4=16 because 4*2=8 and
