In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
datasetSize = 100000
bStringLen = 50
batch_size = 32
iterations = 10000

In [3]:
X_train = np.random.randint(2, size=(datasetSize, bStringLen, 1))
Y_train = (X_train.cumsum(axis=1) % 2 == 0).astype("float32")

In [4]:
X_train, Y_train = torch.from_numpy(X_train).float(), torch.from_numpy(Y_train)

In [5]:
print(torch.cuda.is_available())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

True


In [6]:
class xor_fixed(nn.Module):
    def __init__(self):
        super(xor_fixed, self).__init__()
        self.hidden_size = 2
        self.input_size = 1
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, 1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x)
        x = nn.Sigmoid()(x)
        return x

In [7]:
model = xor_fixed().to(device)
idxs = torch.randint(datasetSize, (batch_size,))
X_batch = X_train[idxs].to(device)
model(X_batch).shape

torch.Size([32, 50, 1])

In [8]:
optimizer = optim.Adam(model.parameters())
bce = nn.BCELoss()

In [9]:
def train(model, X, Y, optimizer, criterion):
    model.train()
    total_loss = 0
    for i in range(1, iterations+1):
        optimizer.zero_grad()

        idxs = torch.randint(datasetSize, (batch_size,))
        X_batch = X[idxs].to(device)
        Y_batch = Y[idxs].to(device)

        output = model(X_batch)

        loss = criterion(output, Y_batch)
        
        loss.backward()
        
        optimizer.step()

        total_loss += loss.item()
        if i % 1000 == 0:
            accuracy = ((output > 0.5) == (Y_batch > 0.5)).sum() / (batch_size*50) * 100
            print(f"Training Iteration: {i}, Accuracy: {accuracy.item()}, Loss: {loss.item()}")

In [10]:
train(model, X_train, Y_train, optimizer, bce)

Training Iteration: 1000, Accuracy: 50.25, Loss: 0.6917176842689514
Training Iteration: 2000, Accuracy: 56.374996185302734, Loss: 0.68016117811203
Training Iteration: 3000, Accuracy: 100.0, Loss: 0.17933964729309082
Training Iteration: 4000, Accuracy: 100.0, Loss: 0.04794060438871384
Training Iteration: 5000, Accuracy: 100.0, Loss: 0.022640692070126534
Training Iteration: 6000, Accuracy: 100.0, Loss: 0.012710405513644218
Training Iteration: 7000, Accuracy: 100.0, Loss: 0.007362487260252237
Training Iteration: 8000, Accuracy: 100.0, Loss: 0.004329217132180929
Training Iteration: 9000, Accuracy: 100.0, Loss: 0.002545875497162342
Training Iteration: 10000, Accuracy: 100.0, Loss: 0.0015488138888031244


In [11]:
testDatasetSize = 10000
X_test = torch.randint(2, size=(testDatasetSize, 50,  1)).float().to(device)
Y_test = (X_test.cumsum(axis=1) % 2 == 0).float()
output = model(X_test)

print(f"Test Accuracy: {((output > 0.5) == (Y_test > 0.5)).sum() / (testDatasetSize*50) * 100}")

Test Accuracy: 100.0
