In [4]:
import torch
from torch.utils.data import Dataset, DataLoader

class ChatDataset(Dataset):
    def __init__(self, filepath):
        with open(filepath, 'r', encoding='utf-8') as file:
            self.lines = [line.strip() for line in file if line.strip()]

    def __len__(self):
        return len(self.lines)

    def __getitem__(self, idx):
        return self.lines[idx]

# Path to your data file
filepath = '/content/data.txt'
dataset = ChatDataset(filepath)
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)


In [5]:
import torch.nn as nn

class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hidden_dim, n_layers):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hidden_dim, n_layers)

    def forward(self, src):
        embedded = self.embedding(src)
        outputs, (hidden, cell) = self.rnn(embedded)
        return hidden, cell

class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hidden_dim, n_layers):
        super().__init__()
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hidden_dim, n_layers)
        self.fc_out = nn.Linear(hidden_dim, output_dim)

    def forward(self, input, hidden, cell):
        input = input.unsqueeze(0)
        embedded = self.embedding(input)
        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        prediction = self.fc_out(output.squeeze(0))
        return prediction, hidden, cell

# Hyperparameters
input_dim = output_dim = 10000  # Vocabulary size
emb_dim = 256  # Embedding dimensions
hidden_dim = 512  # LSTM hidden dimensions
n_layers = 2  # Number of LSTM layers

encoder = Encoder(input_dim, emb_dim, hidden_dim, n_layers)
decoder = Decoder(output_dim, emb_dim, hidden_dim, n_layers)


In [6]:
optimizer = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=0.001)
criterion = nn.CrossEntropyLoss()

def train(data_loader, encoder, decoder, optimizer, criterion, device):
    encoder.train()
    decoder.train()
    for src, trg in data_loader:
        optimizer.zero_grad()
        hidden, cell = encoder(src)
        output, hidden, cell = decoder(trg, hidden, cell)
        loss = criterion(output, trg)
        loss.backward()
        optimizer.step()
        print(f"Training Loss: {loss.item()}")

# Example usage
# Assuming `src` and `trg` are preprocessed batches of input and target sequences
train(dataloader, encoder, decoder, optimizer, criterion, device='cuda')


ValueError: too many values to unpack (expected 2)