In [8]:
import torch
import torch.nn as nn
import torch.optim as optim

print(torch.backends.mps.is_available())  # Should print True
print(torch.backends.mps.is_built())      # Should print True

device = torch.device('mps')
print('device', device)

True
True
device mps


In [23]:
alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZa'
char_to_idx = {char: idx for idx, char in enumerate(alphabet)}
idx_to_char = {idx: char for idx, char in enumerate(alphabet)}

X = []
y = []

for i in range(len(alphabet) - 1):
  X.append(char_to_idx[alphabet[i]])
  y.append(char_to_idx[alphabet[i + 1]])

X = torch.tensor(X).view(-1, 1).float().to(device)
y = torch.tensor(y).long().to(device)

In [24]:
class LSTMPredictor(nn.Module):
  def __init__(self, input_size, hidden_size, output_size, num_layers=1):
    super(LSTMPredictor, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers

    self.lstm = nn.LSTM(
      input_size=input_size,
      hidden_size=hidden_size,
      num_layers=num_layers,
      batch_first=True
    )

    self.dropout = nn.Dropout(0.2)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, x, hidden=None):
    batch_size = x.size(0)

    if hidden is None:
      h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
      c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
      hidden = (h0, c0)

    out, hidden = self.lstm(x.unsqueeze(-1), hidden)
    out = self.dropout(out)
    out = self.fc(out.squeeze(1))
    return out, hidden

In [25]:
input_size = 1
hidden_size = 32
output_size = len(alphabet)
model = LSTMPredictor(input_size, hidden_size, output_size, num_layers=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
num_epochs = 10000
for epoch in range(num_epochs):
  model.zero_grad()
  output, hidden = model(X)
  loss = criterion(output, y)
  loss.backward()
  optimizer.step()

  if (epoch + 1) % 1000 == 0:
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1000/10000], Loss: 0.9290
Epoch [2000/10000], Loss: 0.8235
Epoch [3000/10000], Loss: 0.6589
Epoch [4000/10000], Loss: 0.5425
Epoch [5000/10000], Loss: 0.4211
Epoch [6000/10000], Loss: 0.6419
Epoch [7000/10000], Loss: 0.3120
Epoch [8000/10000], Loss: 0.3001
Epoch [9000/10000], Loss: 0.4460
Epoch [10000/10000], Loss: 0.4999


In [26]:
def predict_next_letter(current_letter, model):
  model.eval()
  with torch.no_grad():
    input_idx = torch.tensor([[char_to_idx[current_letter]]]).float().to(device)
    output, _ = model(input_idx)
    predicted_idx = torch.argmax(output).item()
    return idx_to_char[predicted_idx]
  model.train()

In [27]:
test_sequence = ['a', 'f', 'm', 'r', 'z', 'A', 'J', 'K', 'Z']
print("\nPrediction examples:")
for letter in test_sequence:
  next_letter = predict_next_letter(letter, model)
  print(f"Current letter: {letter}, Predicted next letter: {next_letter}")


Prediction examples:
Current letter: a, Predicted next letter: b
Current letter: f, Predicted next letter: g
Current letter: m, Predicted next letter: n
Current letter: r, Predicted next letter: s
Current letter: z, Predicted next letter: A
Current letter: A, Predicted next letter: B
Current letter: J, Predicted next letter: K
Current letter: K, Predicted next letter: L
Current letter: Z, Predicted next letter: a


In [30]:
# Test the model's understanding of alphabetical order
print("\nTesting complete alphabet sequence:")
current_letter = 'a'
predicted_sequence = [current_letter]

for _ in range(56):  # Predict the next 25 letters
  next_letter = predict_next_letter(current_letter, model)
  predicted_sequence.append(next_letter)
  current_letter = next_letter

print("Predicted sequence:", ''.join(predicted_sequence))


Testing complete alphabet sequence:
Predicted sequence: abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcde
