# Task: Build Transformer (TF) for implementing factorial

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd

## Here we show how we can implement factorial() function with looped TF. 

The input/output of the factorial function we are implementing is:
- Input: input_ftrl (the number we want to compute the factorial)
- Output: (input_ftrl)!

This jupyter notebook is organized as below:

1. We first specify 
- the memory & SUBLEQ commands to define the factorial function 
- the parameters used for building the looped TF 

2. We run factorial with SUBLEQ commands manually (without using TF)

3. We run factorial with SUBLEQ commands by Looped TF

4. We check whether our Looped TF implementation result matches with ground truth value?

## 1. Setup

### Specify the parameters for computing the factorial, i.e., (input_ftrl)!

In [2]:
input_ftrl = 7 # put the value we want to compute the factorial

In [3]:
# write subleq commands & initial memory for the given input_ftrl
import csv

## memory (written in "name=initial_value" format)
# [M1] MINUS_ONE = -1 (we set a constant for implementing EOF)
# [M2] ZERO = 0 (we set a constant for implementing EOF)
# [M3] N = (the input number we want to compute factorial)
# [M4] N' = -N
# [M5] counter = 0
# [M6] temp = 0
# [M7] minus_two = -2
# [M8] minus_one = -1
# [M9] zero = 0
# [M10] one = 1
mems = np.asarray([-1, 0, input_ftrl, -input_ftrl, 0, 0, -2, -1, 0, 1]).reshape(-1, 1) 
mems_path = 'inputs/ftrl_init_mem.csv'
pd.DataFrame(mems).to_csv(mems_path, header  = ['mem'], index=False)    

## commands 
# NOTE: subleq a,b,c does two things:
# 1. mem[b] = mem[b] - mem[a]
# 2. if mem[b] <= 0: goto instruction c
#   else: goto next instruction
# [C1] subleq N, temp, C2
# [C2] subleq minus_two, temp, C4
# [C3] subleq minus_one, temp, C4
# [C4] subleq temp, counter, C11 (EOF)
# [C5] subleq N', N, C11 (EOF)
# [C6] subleq one, counter, C8
# [C7] subleq zero, zero, C5 
# [C8] subleq N', N', C9
# [C9] subleq N, N', C10
# [C10] subleq zero, zero, C3
# [C11] subleq ZERO, MINUS_ONE, C11 (EOF)
cmds = np.asarray(
    [[3,6,2], [7,6,4], [8,6,4], [6,5,11], [4,3,11], [10,5,8], 
     [9,9,5], [4,4,9], [3,4,10], [9,9,3], [2,1,11]
    ])

### Specify the parameters for defining the matrix X (input to TF)

In [4]:
num_cmds = cmds.shape[0]
num_mems = mems.shape[0]
s = 2 # s: number of columns in X for scratchpad (larger than 1 is enough?)
m = num_mems # m: number of columns in X for memory
n = s + m + num_cmds # n: number of columns in X in total (for scratchpad, memory and commands)
logn = int(np.ceil(np.log2(n)))

# compute real factorial (just for getting the number range to decide N)
output_ftrl = 1
for i in range(1, input_ftrl+1):
    output_ftrl = output_ftrl * i
N = int(np.floor(np.log2(output_ftrl)))+3 # N: number of bits used to represent the integer values in each memory element


# Specify the number of rows of input X
# nrows_list: list of number of rows for each block (cmds, memory, scratchpad, program counter, positional encoding, buffer, indicator)
# row_idx_list: list of row index each block starts (memory, scratchpad, program counter, positional encoding, buffer) except cmds & indicator
from utils import get_nrows_subleq, get_row_idx_list_subleq
nrows_cmds, nrows_memory, nrows_scratchpad, nrows_pc, nrows_pos_enc, nrows_buffer = get_nrows_subleq(logn, N) 
num_rows_X = nrows_cmds + nrows_memory + nrows_scratchpad + nrows_pc + nrows_pos_enc + nrows_buffer + 1 
nrows_list = [nrows_cmds, nrows_memory, nrows_scratchpad, nrows_pc, nrows_pos_enc, nrows_buffer, 1] 
row_idx_list = get_row_idx_list_subleq(nrows_list) 
idx_memory, idx_scratchpad, idx_pc, idx_pos_enc, idx_buffer = row_idx_list

### revise & save the cmds.csv 

In [5]:
## add -1 to the address info (mem[a], mem[b], next_cmd) in SUBLEQ cmds
cmds = cmds - 1

## change the index of memory & command in cmds.txt [i -> s+i] [j -> s+m+j]
cmds[:, :2] = cmds[:, :2] + s
cmds[:, 2] = cmds[:, 2] + s + m

In [6]:
cmds_path = 'inputs/ftrl_cmds.csv'
pd.DataFrame(cmds).to_csv(cmds_path, header  = ['a','b','c'], index=False)    

### Load input text files: (1) the subleq commands, (2) the registers

In [7]:
# load the input files 
cmds_filename = 'inputs/ftrl_cmds.csv'
cmds_df = pd.read_csv(cmds_filename)
cmds = cmds_df.to_numpy()
mem_filename = 'inputs/ftrl_init_mem.csv'
mem_df = pd.read_csv(mem_filename)
mem = mem_df.to_numpy().reshape(-1,)

In [8]:
# check the validity of input files
for i in range(len(cmds)):
    #print(i)
    (cmd_a, cmd_b, cmd_c) = cmds[i]
    assert(cmd_a >= s)   # a, b \in [s:s+m] 
    assert(cmd_a < s+m)   
    assert(cmd_b >= s)
    assert(cmd_b < s+m)    
    assert(cmd_c >= s+m) # c \in [s+m:n]
    assert(cmd_c < n)        
assert(len(mem) == m) # mem should have $m$ elements

for i in range(len(mem)):
    assert(mem[i] <= 2**(N-2)-1)
    assert(mem[i] >= -2**(N-2))    
    


### Define the matrix X

In [9]:
from utils import init_input
X, _ = init_input(s,m,n,logn,N,num_rows_X,cmds,nrows_list,row_idx_list,opt=None,mem_given=mem)

## 2. Run factorial (using subleq) manually

In [10]:
num_loops = 500 # need to increase as input_fctl increases?
from utils import run_manual_subleq
manual_subleq_results = run_manual_subleq(cmds, mem, s, m, n, N, num_loops=num_loops)
#manual_subleq_results

In [11]:
header = ['mem[0]']
for i in range(m-2):
    header.append('...')
header = np.append(header, ['mem[m-1]', 'a', 'b', 'c', 'mem[a]', 'mem[b]', 'mem[b]-mem[a]', 'flag', 'p-next'])
manual_subleq_results_df = pd.DataFrame(manual_subleq_results, columns=header)

## 3. Run SUBLEQ using our TF architecture

In [12]:
from subleq import read_inst, read_mem, subtract_mem, write_mem, conditional_branching, error_correction
import os
our_subleq_results = []
our_curr_result = [] 
lam=100 # lambda used for softmax in TF # need to increase as input_fctl increases?

for i in range(num_loops):
    #print('we are in loop ', i)
    
    # Step 1. read instruction & check a, b, c 
    X1, our_curr_result, TF_read_inst = read_inst(X,s,m,n,logn,num_rows_X,N,i+1,nrows_list,row_idx_list,None,our_curr_result,lam) 
    # Step 2. read memory & check mem[a], mem[b]
    X2, our_curr_result, TF_mem = read_mem(X1,n,logn,num_rows_X,N,i+1,nrows_list,row_idx_list,None,our_curr_result,lam) 
    # Step 3. subtract memory & check mem[b]-mem[a] 
    X3, our_curr_result, *TF_subtract_mem = subtract_mem(X2,n,logn,num_rows_X,N,i+1,nrows_list,row_idx_list,None,our_curr_result,lam) 
    # Step 4. write memory 
    X4, our_curr_result, TF_write_mem = write_mem(X3,n,logn,num_rows_X,N,i+1,nrows_list,row_idx_list,None,our_curr_result,lam) 
    # Step 5. conditional branching & check flag, p_{next}
    X5, our_curr_result, *TF_cond_branch = conditional_branching(X4,n,logn,num_rows_X,N,i+1,nrows_list,row_idx_list,None,our_curr_result,lam) 
    # Step 6. error correction        
    X6, our_curr_result, TF_err_corr = error_correction(X5,n,logn,num_rows_X,N,i+1,nrows_list,row_idx_list,None,our_curr_result,lam) 

    X = X6 # go to the next loop
    our_subleq_results.append(tuple(our_curr_result))
    #print(our_curr_result)

In [13]:
our_subleq_results_df = pd.DataFrame(our_subleq_results, columns=header)
#our_subleq_results_df

## 4. Compare our TF SUBLEQ result & manual SUBLEQ result

In [14]:
fact = manual_subleq_results[-1][2] # extract mem[2] from the final result of manual computation
TF_fact = our_subleq_results[-1][2] # extract mem[2] from the final result of TF

print('The factorial of {}, the actual value, is {}'.format(input_ftrl, output_ftrl))
print('The factorial of {}, computed in subleq manually, is {}'.format(input_ftrl, fact))
print('The factorial of {}, computed in subleq using TF, is {}'.format(input_ftrl, TF_fact))
print(manual_subleq_results[-1], '\n', our_subleq_results[-1])
if output_ftrl == our_subleq_results[-1][2]:
    print('The factorial in TF works properly!')
else:
    print('Check the code again :p')

The factorial of 7, the actual value, is 5040
The factorial of 7, computed in subleq manually, is 5040
The factorial of 7, computed in subleq using TF, is 5040
(-1, 0, 5040, -5040, 0, 0, -2, -1, 0, 1, 3, 2, 22, 0, -1, -1, 1, 22) 
 (-1, 0, 5040, -5040, 0, 0, -2, -1, 0, 1, 3, 2, 22, 0, -1, -1, 1, 22)
The factorial in TF works properly!
