## Data Generation
Generate data by generating random integers, converting to binary representation, then dumping into array

In [33]:
import torch
torch.cuda.is_available()

True

In [34]:
import numpy as np
import re
np.random.seed(12345)
intrep = np.random.randint(0, 256)
print(intrep)
binrep = np.binary_repr(intrep, width=8)
print(binrep)
arrep = np.flip(np.array(re.split('',binrep)[1:-1], dtype=int))
print(arrep)

226
11100010
[0 1 0 0 0 1 1 1]


In [35]:
import numpy as np
import re
np.random.seed(54321)

ints = np.random.randint(0, 256, size=100_000)
A = ints.copy()[0:50_000].astype(np.int64)
B = ints.copy()[50_000:].astype(np.int64)
C = A*B.astype(np.int64)

Astr, Bstr, Cstr = np.empty(len(A), dtype=object), np.empty(len(A), dtype=object), np.empty(len(A), dtype=object)

for i in range(len(A)):
    Astr[i] = np.binary_repr(A[i], width=8)
    Bstr[i] = np.binary_repr(B[i], width=8)
    Cstr[i] = np.binary_repr(C[i], width=16)
print(Astr)

Aarr, Barr, Carr = np.empty((len(A),8), dtype=object), np.empty((len(A),8), dtype=object), np.empty((len(A),16), dtype=object)

for i in range(len(A)):
    Aarr[i] = np.flip(np.array(re.split('', Astr[i])[1:-1], dtype=int))
    Barr[i] = np.flip(np.array(re.split('', Bstr[i])[1:-1], dtype=int))
    Carr[i] = np.flip(np.array(re.split('', Cstr[i])[1:-1], dtype=int))
print(Aarr)

['01010001' '10001010' '00011010' ... '11100100' '00010100' '11010101']
[[1 0 0 ... 0 1 0]
 [0 1 0 ... 0 0 1]
 [0 1 0 ... 0 0 0]
 ...
 [0 0 1 ... 1 1 1]
 [0 0 1 ... 0 0 0]
 [1 0 1 ... 0 1 1]]


Function form of the above code for main code base

In [36]:
'''Functions to generate the training/test data, and interlace binary numbers for the input'''

import numpy as np
import re

def interlace(in1:np.ndarray, in2:np.ndarray) -> np.ndarray:
    '''
    Interlaces in1 and in2
    
    Example: 
    in1 = [[[0 1], [1 0], ...], ...]
    in2 = [[[1 0], [1 0], ...], ...]
    returns: [[[0 1], [1 0], [1 0], [1 0]], ...]
    '''
    laced = np.empty((in1.shape[0], in1.shape[1]*2, 2), dtype=int)

    for i in range(in1.shape[0]):
        for j in range(in1.shape[1]):
            laced[i, 2*j] = in1[i,j]
            laced[i, (2*j)+1] = in2[i,j]

    return laced

def generate_train_test(train_size, test_size, seed) -> np.ndarray:
    '''
    Generates the training and testing data given a random seed.

    We use one-hot defintiions of:
    1 = [0 1], 0 = [1 0]

    Returns:
    A_train - 2D array where each row is separated digits of 8-bit binary number
    B_train - 2D array where each row is separated digits of 8-bit binary number
    C_train - 2D array where each row is separated digits of a 16-bit binary number of A*B
    A_test - ...
    B_test - ...
    C_test - ...
    '''

    total_size = train_size + test_size

    # generate the random integers 0-255
    np.random.seed(seed)
    ints = np.random.randint(0, 256, size=total_size*2)
    A = ints.copy()[0:total_size].astype(np.int64)
    B = ints.copy()[total_size:].astype(np.int64)
    C = A*B.astype(np.int64)

    # arrays to hold the binary-representation-strings of the converted integers
    Astr, Bstr, Cstr = np.empty(len(A), dtype=object), np.empty(len(A), dtype=object), np.empty(len(A), dtype=object)

    for i in range(len(A)):
        Astr[i] = np.binary_repr(A[i], width=8)
        Bstr[i] = np.binary_repr(B[i], width=8)
        Cstr[i] = np.binary_repr(C[i], width=16)

    # arrays to hold the 0's and 1's that come from splitting the strings of binary-representation
    Aarr, Barr, Carr = np.empty((len(A),8,2), dtype=object), np.empty((len(A),8,2), dtype=object), np.empty((len(A),16, 2), dtype=object)

    for i in range(len(A)):
        for j, binary in enumerate(np.flip(np.array(re.split('', Astr[i])[1:-1], dtype=int))):
            Aarr[i, j] = [1, 0] if binary == 0 else [0, 1]

        for j, binary in enumerate(np.flip(np.array(re.split('', Bstr[i])[1:-1], dtype=int))):
            Barr[i, j] = [1, 0] if binary == 0 else [0, 1]

        for j, binary in enumerate(np.flip(np.array(re.split('', Cstr[i])[1:-1], dtype=int))):
            Carr[i, j] = [1, 0] if binary == 0 else [0, 1]
    
    return Aarr[0:train_size], Barr[0:train_size], Carr[0:train_size], Aarr[train_size:], Barr[train_size:], Carr[train_size:]

train_size = 10_000
test_size = 2_000

atr,btr,ctr,ate,bte,cte = generate_train_test(train_size, test_size, 12345)
# print(atr[0])
print(btr[0])
abtr, batr = interlace(atr, btr), interlace(btr, atr)
abte, bate = interlace(ate, bte), interlace(bte, ate)


[[0 1]
 [1 0]
 [0 1]
 [0 1]
 [1 0]
 [0 1]
 [1 0]
 [1 0]]


In [37]:
ctr.shape

(10000, 16, 2)

## Model

In [38]:
import torch
import torch.nn as nn
import torch.nn.functional as func

class Net(nn.Module):
    def __init__(self):
        super().__init__()

        self.lstm = nn.LSTM(2, 32, num_layers=4, batch_first=True) # input size is either 2 or 1?
        self.linlayer1 = nn.Linear(32, 512) # go from hidden layer size of lstm to reduced size
        self.linLayer2 = nn.Linear(512, 128)
        self.linLayer3 = nn.Linear(128, 2) # output of 32 which gets reshaped
        self.softmax = nn.Softmax(dim=2)

    def forward(self, x, input_size):
        outputs, (hidden_state, cell_state) = self.lstm(x)
        print('outputs shape:', outputs.shape)
        print('hidden_state shape:', hidden_state.shape)

        # outputs, hidden_state = self.lstm(x)
        # output = self.linlayer1(hidden_state[-1])
        output = self.linlayer1(outputs)
        output = torch.sigmoid(output)

        output = self.linLayer2(output)
        output = torch.sigmoid(output)

        output = self.linLayer3(output)
        output = torch.sigmoid(output)
        # print('Layer three output size: ', output.shape)
        # output = torch.reshape(output, (input_size, 16, 2))
        return self.softmax(output)

    def reset(self):
        self.lstm.reset_parameters()
        self.linlayer1.reset_parameters()
        self.linLayer2.reset_parameters()

model = Net().to(torch.device("cuda"))

In [39]:
import torch.optim as optim
learning_rate = 0.05
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

abtr = torch.from_numpy(abtr).type(torch.float).to("cuda")
batr = torch.from_numpy(batr).type(torch.float).to("cuda")
abte = torch.from_numpy(abte).type(torch.float).to("cuda")
bate = torch.from_numpy(bate).type(torch.float).to("cuda")
ctr = torch.from_numpy(ctr.astype(int)).type(torch.float).to("cuda")
cte = torch.from_numpy(cte.astype(int)).type(torch.float).to("cuda")

print(model.forward(abtr, train_size).shape, ctr.shape)
print(model.forward(abtr, train_size)[0])
# print(ctr[0])

outputs shape: torch.Size([10000, 16, 32])
hidden_state shape: torch.Size([4, 10000, 32])
torch.Size([10000, 16, 2]) torch.Size([10000, 16, 2])
outputs shape: torch.Size([10000, 16, 32])
hidden_state shape: torch.Size([4, 10000, 32])
tensor([[0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284],
        [0.4716, 0.5284]], device='cuda:0', grad_fn=<SelectBackward0>)


In [40]:
num_epochs = 500
display_epochs = 50



obj_vals= []
cross_vals= []

model.reset() # reset your parameters
loss = nn.CrossEntropyLoss()

# def closure():
#     optimizer.zero_grad()
#     objective = loss(model.forward(abtr), ctr, abtr.shape, ctr.shape)
#     objective.backward()
#     return objective

for epoch in range(num_epochs):

    obj_val = loss(model.forward(abtr, train_size), ctr)
    
    optimizer.zero_grad()
    obj_val.backward()
    optimizer.step()
    obj_vals.append(obj_val.item())

    if (epoch+1) % display_epochs == 0:
        print ('Epoch [{}/{}]\t Training Loss: {:.6f}'.format(epoch+1, num_epochs, obj_val.item()))
            
    # as it trains check how well it tests
    with torch.no_grad(): 
        # don't track calculations in the following scope for the purposes of gradients
        cross_val = loss(model.forward(abte, test_size), cte)
        cross_vals.append(cross_val)

    if (epoch+1) % display_epochs == 0:
        print ('Epoch [{}/{}]\t Test Loss: {:.6f}'.format(epoch+1, num_epochs, cross_val.item()))
        
print('Final training loss: {:.4f}'.format(obj_vals[-1]))
print('Final test loss: {:.4f}'.format(cross_vals[-1]))

outputs shape: torch.Size([10000, 16, 32])
hidden_state shape: torch.Size([4, 10000, 32])
outputs shape: torch.Size([2000, 16, 32])
hidden_state shape: torch.Size([4, 2000, 32])
outputs shape: torch.Size([10000, 16, 32])
hidden_state shape: torch.Size([4, 10000, 32])
outputs shape: torch.Size([2000, 16, 32])
hidden_state shape: torch.Size([4, 2000, 32])
outputs shape: torch.Size([10000, 16, 32])
hidden_state shape: torch.Size([4, 10000, 32])
outputs shape: torch.Size([2000, 16, 32])
hidden_state shape: torch.Size([4, 2000, 32])
outputs shape: torch.Size([10000, 16, 32])
hidden_state shape: torch.Size([4, 10000, 32])
outputs shape: torch.Size([2000, 16, 32])
hidden_state shape: torch.Size([4, 2000, 32])
outputs shape: torch.Size([10000, 16, 32])
hidden_state shape: torch.Size([4, 10000, 32])
outputs shape: torch.Size([2000, 16, 32])
hidden_state shape: torch.Size([4, 2000, 32])
outputs shape: torch.Size([10000, 16, 32])
hidden_state shape: torch.Size([4, 10000, 32])
outputs shape: torch

KeyboardInterrupt: 

In [None]:
model.forward(abtr, train_size)[1]

tensor([[0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5068, 0.4932]], device='cuda:0', grad_fn=<SelectBackward0>)

In [None]:
ctr[1]

tensor([[1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')