In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
# Define the RNN model
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()

        self.hidden_size = hidden_size

        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

In [3]:
# Function to generate training data
def generate_training_data(n):
    binary_strings = [format(i, '07b') for i in range(1, n+1)]
    decimal_numbers = list(range(1, n+1))
    return binary_strings, decimal_numbers

# Generate the training data
binary_strings, decimal_numbers = generate_training_data(63)

# Convert the training data to tensors
binary_tensors = [torch.tensor([int(b) for b in bs], dtype=torch.float32).view(-1, 1) for bs in binary_strings]
decimal_tensors = [torch.tensor([dn], dtype=torch.long) for dn in decimal_numbers]

In [4]:
binary_tensors[0]

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.]])

In [5]:
from tqdm.notebook import trange

In [6]:
# Define the RNN
n_input = 1
n_hidden = 16
n_output = 64
rnn = SimpleRNN(n_input, n_hidden, n_output)

# Define the loss function and the optimizer
criterion = nn.NLLLoss()
optimizer = torch.optim.SGD(rnn.parameters(), lr=0.005)

# Train the RNN
n_epochs = 5000
for epoch in trange(n_epochs):
    total_loss = 0
    for binary, decimal in zip(binary_tensors, decimal_tensors):
        hidden = rnn.initHidden()
        for i in range(binary.size()[0]):
            output, hidden = rnn(binary[i].view(1, -1), hidden)
        loss = criterion(output, decimal)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    if (epoch + 1) % 500 == 0:
        print(f'Epoch {epoch + 1}/{n_epochs}, Loss: {total_loss / len(binary_tensors)}')

  0%|          | 0/5000 [00:00<?, ?it/s]

Epoch 500/5000, Loss: 0.39086836268977515
Epoch 1000/5000, Loss: 0.21066175769304946
Epoch 1500/5000, Loss: 0.14125838452979686
Epoch 2000/5000, Loss: 0.10544409492747887
Epoch 2500/5000, Loss: 0.08383469372647741
Epoch 3000/5000, Loss: 0.06945531883661354
Epoch 3500/5000, Loss: 0.059227800220325
Epoch 4000/5000, Loss: 0.051596030326826234
Epoch 4500/5000, Loss: 0.045690228297774284
Epoch 5000/5000, Loss: 0.04098713026928996


In [9]:
test_data = [1, 0, 0, 0]
test_result_should = 8

test_data_tensor = torch.tensor([int(b) for b in test_data], dtype=torch.float32).view(-1, 1)
hidden = rnn.initHidden()
for i in range(test_data_tensor.size()[0]):
    output, hidden = rnn(test_data_tensor[i].view(1, -1), hidden)
_, predicted = torch.max(output, 1)
print(f'Test data: {test_data}, Predicted: {predicted.item()}, Should be: {test_result_should}')

Test data: [1, 0, 0, 0], Predicted: 8, Should be: 8
