# addGPT

LLM to do basic addition using reasoning of two unsigned numbers.

## Dataset generation

1. Sample two random numbers from uniform distribution with range 0 to 99999999

2. Generate step by step addition procedure for adding two numbers in text.

In [9]:
import numpy as np
import pandas as pd
from nltk.tokenize import RegexpTokenizer
import torch

In [2]:
def generateReasoningToken(i,j):
  rt="<reason>"
  while i>0 or j>0:
    p = i%10
    q = j%10
    sum = p+q
    rt += "<digit_sum>" + str(sum%10) + "</digit_sum>"
    rt += "<carry>" + str(int(sum/10)) + "</carry>"
    i = int(i/10)
    j = int(j/10)
  return rt + "</reason>"

generateReasoningToken(100, 55)

'<reason><digit_sum>5</digit_sum><carry>0</carry><digit_sum>5</digit_sum><carry>0</carry><digit_sum>1</digit_sum><carry>0</carry></reason>'

In [3]:
A = np.random.uniform(0, 99999999, 100000).round().astype(int)
B = np.random.uniform(0, 99999999, 100000).round().astype(int)

Input = np.char.add(np.char.add(np.char.add(np.char.add("<prompt>",A.astype(str)), '+'), B.astype(str)),"</prompt>")
Reason = np.array([generateReasoningToken(i,j) for i,j in zip(A,B)])
Output = np.char.add("<answer>",np.char.add((A+B).astype(str), "</answer>"))

Examples = np.char.add(Input,np.char.add(Reason,Output))

## Tokenization

In [4]:
dictionary = {
    "<nop/>":0,
    "<prompt>":1,
    "</prompt>":2,
    "<answer>":3,
    "</answer>":4,
    "<reason>":5,
    "</reason>":6,
    "<digit_sum>":7,
    "</digit_sum>":8,
    "<carry>":9,
    "</carry>":10,
    "0":11,
    "1":12,
    "2":13,
    "3":14,
    "4":15,
    "5":16,
    "6":17,
    "7":18,
    "8":19,
    "9":20,
    "+": 21
}

ReverseIdToToken = {v:k for k,v in dictionary.items()}

tokenizer = RegexpTokenizer(r'<nop/>|[0-9]|\+|<prompt>|</prompt>|<answer>|</answer>|<reason>|<digit_sum>|<carry>|</reason>|</digit_sum>|</carry>')

In [5]:
ExampleTokens = list(map(tokenizer.tokenize, Examples))

In [6]:
TokenIds = [[dictionary[token] for token in example] for example in ExampleTokens]

In [7]:
def create_training_examples(token_ids, context_length):
  training_examples = []
  for example in token_ids:
    for i in range(0, len(example)):
      context = example[max(i-context_length+1, 0):i+1]

      if len(context)<context_length:
        filler = [0]*(context_length-len(context))
        # print(len(filler))
        filler.extend(context)
        context=filler
          # context.extend([0]*(context_length-len(context)))
      # print(len(context))
      assert len(context)==context_length
      training_examples.append(context)

  return np.array(training_examples)

context_length = 65
training_examples = create_training_examples(TokenIds, context_length)


In [8]:
print(f"Shape of training examples: {training_examples.shape}")
print(f"First training example: {training_examples[32873]}")

Shape of training examples: (7921284, 65)
First training example: [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  1 12 13 19 15 16 13 12 15 21 15 14 13 14 17 12
 14 15  2  5  7 19  8  9 11 10  7 15  8  9 11 10  7]


In [10]:
data = torch.tensor(training_examples, dtype=torch.float32)

In [11]:
torch.save(data, 'data.pt')

In [12]:
idx = torch.randperm(data.shape[0])
idx.shape

torch.Size([7921284])

In [13]:
X = data[idx,0:64]
Y = data[idx, 1:65]

In [14]:
X[0,:]

tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  1., 19., 18., 18., 18., 14., 18., 14., 21., 14.,
        15., 18., 14., 18., 15., 20., 11.,  2.])

In [15]:
Y[0]

tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  1., 19., 18., 18., 18., 14., 18., 14., 21., 14., 15.,
        18., 14., 18., 15., 20., 11.,  2.,  5.])

In [16]:
torch.save(X, 'X.pt')
torch.save(Y, 'Y.pt')

## Model

Transformer architecture with context window of 64 tokens.

In [None]:
transformer = torch.nn.Transformer()

