In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# Simple dataset: sequence of characters
data = "hello world"
chars = list(set(data))
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

In [2]:
char_to_idx

{'d': 0, 'l': 1, 'h': 2, ' ': 3, 'e': 4, 'o': 5, 'w': 6, 'r': 7}

In [3]:
idx_to_char

{0: 'd', 1: 'l', 2: 'h', 3: ' ', 4: 'e', 5: 'o', 6: 'w', 7: 'r'}

In [8]:
def one_hot_encode(sequence, n_chars):
    encoding = torch.zeros(len(sequence), n_chars)
    # print("test")
    # print(encoding.shape)
    for i, char in enumerate(sequence):
        encoding[i][char_to_idx[char]] = 1
    return encoding

In [9]:
input_size = len(chars)
one_hot_encode(data, input_size)

tensor([[0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0.]])

In [22]:
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out)
        return out, hidden

In [20]:
one_hot_encode(data[:-1], input_size).shape
one_hot_encode(data[:-1], input_size).unsqueeze(0).shape
## 

torch.Size([1, 10, 8])

In [24]:
input_size = len(chars)
hidden_size = 12
output_size = len(chars)
model = SimpleRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
hidden = None
for epoch in range(10):
    optimizer.zero_grad()
    input_seq = one_hot_encode(data[:-1], input_size).unsqueeze(0)
    target_seq = torch.tensor([char_to_idx[ch] for ch in data[1:]])
    output, hidden = model(input_seq, hidden)
    loss = criterion(output.view(-1, output_size), target_seq)
    loss.backward()
    optimizer.step()
    hidden = hidden.detach()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

Epoch 1, Loss: 2.095881462097168
Epoch 2, Loss: 2.0332283973693848
Epoch 3, Loss: 1.9832065105438232
Epoch 4, Loss: 1.9388881921768188
Epoch 5, Loss: 1.899026870727539
Epoch 6, Loss: 1.86286199092865
Epoch 7, Loss: 1.8301136493682861
Epoch 8, Loss: 1.8008530139923096
Epoch 9, Loss: 1.7751657962799072
Epoch 10, Loss: 1.7522751092910767


In [25]:
import torch
import torch.nn as nn
import torch.optim as optim

class SimpleTransformer(nn.Module):
    def __init__(self, input_dim, model_dim, nhead, num_encoder_layers, num_decoder_layers, output_dim):
        super(SimpleTransformer, self).__init__()
        self.encoder = nn.Embedding(input_dim, model_dim)
        self.transformer = nn.Transformer(model_dim, nhead, num_encoder_layers, num_decoder_layers)
        self.decoder = nn.Linear(model_dim, output_dim)

    def forward(self, src, tgt):
        src = self.encoder(src)
        tgt = self.encoder(tgt)
        output = self.transformer(src, tgt)
        output = self.decoder(output)
        return output


In [26]:
input_dim = 1000
model_dim = 512
nhead = 8
num_encoder_layers = 3
num_decoder_layers = 3
output_dim = 1000

model = SimpleTransformer(input_dim, model_dim, nhead, num_encoder_layers, num_decoder_layers, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)



In [31]:
src = torch.randint(0, input_dim, (10, 32))
tgt = torch.randint(0, input_dim, (10, 32))
tgt_output = torch.randint(0, output_dim, (10, 32))

for epoch in range(10):
    optimizer.zero_grad()
    output = model(src, tgt)
    loss = criterion(output.view(-1, output_dim), tgt_output.view(-1))
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

Epoch 1, Loss: 23.506019592285156
Epoch 2, Loss: 20.867658615112305
Epoch 3, Loss: 17.07345199584961
Epoch 4, Loss: 12.046655654907227
Epoch 5, Loss: 6.62406063079834
Epoch 6, Loss: 7.386845588684082
Epoch 7, Loss: 8.1568021774292
Epoch 8, Loss: 8.26572036743164
Epoch 9, Loss: 7.5390119552612305
Epoch 10, Loss: 6.645153045654297
