In [None]:
import torch.nn as nn

batch_size = 32
hidden_size = 30
lr = 0.01
time_steps = 8
input_size = 27

class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, h0, c0):
        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(out)
        return out, (hn, cn)

model = SimpleLSTM(input_size, hidden_size, input_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

iterations = 0
num_epochs = 2000

h0 = torch.zeros(1, batch_size, hidden_size)
c0 = torch.zeros(1, batch_size, hidden_size)

for epoch in range(num_epochs):
    for i in range(0, Xtr_batched.size(1) - time_steps, time_steps):
        Xb = Xtr_batched[:, i:i+time_steps, :]
        Yb = Ytr_batched[:, i:i+time_steps]
        
        optimizer.zero_grad()
        
        output, (h0, c0) = model(Xb, h0.detach(), c0.detach())
        
        loss = criterion(output.view(-1, input_size), Yb.view(-1))
        loss.backward()
        
        optimizer.step()
        
        if (i // time_steps) % 100 == 0:
            print(f'Epoch [{epoch}/{num_epochs}], Step [{i // time_steps}], Loss: {loss.item():.4f}')

    # Evaluate on development set
    if (epoch + 1) % 100 == 0:
        with torch.no_grad():
            dev_loss = 0
            for j in range(0, Xdev_batched.size(1) - time_steps, time_steps):
                Xb_dev = Xdev_batched[:, j:j+time_steps, :]
                Yb_dev = Ydev_batched[:, j:j+time_steps]
                
                output_dev, _ = model(Xb_dev, h0, c0)
                dev_loss += criterion(output_dev.view(-1, input_size), Yb_dev.view(-1)).item()
            print(f'Epoch [{epoch + 1}/{num_epochs}], Dev Loss: {dev_loss / (Xdev_batched.size(1) // time_steps):.4f}')


In [None]:
import torch
import torch.nn as nn

# Assuming stoi is defined elsewhere
# stoi = {'a': 0, 'b': 1, ..., 'z': 25, '.': 26}

# Helper functions for encoding and batching
def encode_words(words):
    encoded = []
    for w in words:
        encoded.extend([stoi[ch] for ch in '.' + w + '.'])
    return encoded

encoded = encode_words(words)

def create_pairs(seq, block_size):
    X, Y = [], []
    for i in range(0, len(seq) - block_size):
        X.append(seq[i:i + block_size])
        Y.append(seq[i + 1:i + block_size + 1])
    X = torch.tensor(X, dtype=torch.long)
    Y = torch.tensor(Y, dtype=torch.long)
    return X, Y

n = len(encoded)
n1 = int(0.8 * n)

train_seq = encoded[:n1]
dev_seq = encoded[n1:]

block_size = 8

Xtr, Ytr = create_pairs(train_seq, block_size)
Xdev, Ydev = create_pairs(dev_seq, block_size)

def split_into_batches(X, Y, batch_size):
    num_batches = X.size(0) // batch_size
    X = X[:num_batches * batch_size]
    Y = Y[:num_batches * batch_size]
    
    X = X.view(batch_size, -1, X.size(-1))
    Y = Y.view(batch_size, -1, Y.size(-1))
    return X, Y

Xtr_batched, Ytr_batched = split_into_batches(Xtr, Ytr, batch_size)
Xdev_batched, Ydev_batched = split_into_batches(Xdev, Ydev, batch_size)

class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, h0, c0):
        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(out)
        return out, (hn, cn)

batch_size = 32
hidden_size = 30
lr = 0.01
time_steps = 8
input_size = 27
output_size = 27

model = SimpleLSTM(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

iterations = 0
num_epochs = 2000

# Convert input indices to one-hot vectors
def to_one_hot(indices, num_classes):
    one_hot = torch.zeros(indices.size(0), indices.size(1), num_classes, device=indices.device)
    one_hot.scatter_(2, indices.unsqueeze(2), 1.0)
    return one_hot

one_hot_Xtr = to_one_hot(Xtr_batched, input_size)
one_hot_Xdev = to_one_hot(Xdev_batched, input_size)

h0 = torch.zeros(1, batch_size, hidden_size)
c0 = torch.zeros(1, batch_size, hidden_size)

for epoch in range(num_epochs):
    loss = 0
    for i in range(0, one_hot_Xtr.size(1) - time_steps, time_steps):
        Xb = one_hot_Xtr[:, i:i + time_steps, :]
        Yb = Ytr_batched[:, i:i + time_steps]

        optimizer.zero_grad()
        
        output, (h0, c0) = model(Xb, h0.detach(), c0.detach())
        
        loss = criterion(output.view(-1, output_size), Yb.view(-1))
        loss.backward()
        
        optimizer.step()
        
        if (i // time_steps) % 100 == 0:
            print(f'Epoch [{epoch}/{num_epochs}], Step [{i // time_steps}], Loss: {loss.item():.4f}')

    # Evaluate on development set
    if (epoch + 1) % 100 == 0:
        with torch.no_grad():
            dev_loss = 0
            for j in range(0, one_hot_Xdev.size(1) - time_steps, time_steps):
                Xb_dev = one_hot_Xdev[:, j:j + time_steps, :]
                Yb_dev = Ydev_batched[:, j:j + time_steps]
                
                output_dev, _ = model(Xb_dev, h0, c0)
                dev_loss += criterion(output_dev.view(-1, output_size), Yb_dev.view(-1)).item()
            print(f'Epoch [{epoch + 1}/{num_epochs}], Dev Loss: {dev_loss / (one_hot_Xdev.size(1) // time_steps):.4f}')

    iterations += 1
