In [45]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

BATCH_SIZE = 4

x_train = np.loadtxt("akshar_sequences//x_train.csv", delimiter=",", dtype=int)
y_train = np.loadtxt("akshar_sequences//y_train.csv", delimiter=",", dtype=int)
x_test = np.loadtxt("akshar_sequences//x_test.csv", delimiter=",", dtype=int)
y_test = np.loadtxt("akshar_sequences//y_test.csv", delimiter=",", dtype=int)
x_val = np.loadtxt("akshar_sequences//x_val.csv", delimiter=",", dtype=int)
y_val = np.loadtxt("akshar_sequences//y_val.csv", delimiter=",", dtype=int)


class SequenceDataset(torch.utils.data.Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        x = torch.from_numpy(self.x[index]).long() 
        y = torch.from_numpy(self.y[index]).long() 
        return x, y
    
    def __len__(self):
        return len(self.x)

train_dataset = SequenceDataset(x_train, y_train)
val_dataset = SequenceDataset(x_val, y_val)
test_dataset = SequenceDataset(x_test, y_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [46]:
for x,y in train_loader:
    print(x.shape)
    print(y.shape)
    for seq in x:
        print(seq)
    for seq in y:
        print(seq)
    break

torch.Size([4, 28])
torch.Size([4, 28])
tensor([128,  19,   0,  17,  20,  13,   0,  18,  14,   1,   0,  19, 129, 130,
        130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130])
tensor([128,  18,   0,  12,   4,   6,   0,  13,  19, 129, 130, 130, 130, 130,
        130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130])
tensor([128,  13,   0,  12,   0,  10,   0, 129, 130, 130, 130, 130, 130, 130,
        130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130])
tensor([128,   1,   8,  13,   3,  17,   0,  10,   7,   8,   0, 129, 130, 130,
        130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130])
tensor([128,  62,  74,  91,  61,  88,  82, 101,  70,  62, 129, 130, 130, 130,
        130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130])
tensor([128,  82,  97,  72,  49,  97,  28,  57, 129, 130, 130, 130, 130, 130,
        130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130, 130])
tensor([128,  66, 

In [50]:
import torch.nn as nn
import torch
import random

START_IDX = 128
END_IDX = 129
VOCAB_SIZE = 131
EMBEDDING_DIM = 128
HIDDEN_DIM = 256
EPOCHS = 10
NUM_LAYERS = 3
DROPOUT = 0
BIDIRECTIONAL = 0
CELL_TYPE = "LSTM"
BEAM_SIZE = 5

class Encoder(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers, dropout, cell_type, bidirectional):
        super(Encoder, self).__init__()

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.dropout_prob = dropout
        self.cell_type = cell_type.upper()
        self.bidirectional = bidirectional

        self.dropout = nn.Dropout(self.dropout_prob)
        self.embedding = nn.Embedding(num_embeddings = self.vocab_size, embedding_dim = self.embedding_dim)

        if self.cell_type == 'RNN':
            self.rnn = nn.RNN(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, batch_first=True, dropout=self.dropout_prob, bidirectional=bool(self.bidirectional))
        elif self.cell_type == 'LSTM':
            self.rnn = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, batch_first=True, dropout=self.dropout_prob, bidirectional=bool(self.bidirectional))
        elif self.cell_type == 'GRU':
            self.rnn = nn.GRU(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, batch_first=True, dropout=self.dropout_prob, bidirectional=bool(self.bidirectional))
        else:
            raise ValueError(f"Unsupported cell_type '{self.cell_type}'. Supported types: 'RNN', 'LSTM', 'GRU'.")

    def forward(self, x):
        # x has shape (batch_size, seq_len)

        # Calculate embedding
        embedding = self.embedding(x)
        if(self.dropout_prob != 0) : embedding = self.dropout(embedding)

        # Pass embedding through RNN
        output, hidden = self.rnn(embedding)

        # Apply dropout to hidden state

        if(self.dropout_prob != 0):

            if self.cell_type == 'LSTM':
                hidden = tuple([self.dropout(h) for h in hidden])
            else:
                hidden = self.dropout(hidden)

        return hidden


class Decoder(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers, dropout, cell_type, bidirectional):
        super(Decoder, self).__init__()

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.dropout_prob = dropout
        self.cell_type = cell_type.upper()
        self.bidirectional = bidirectional

        self.dropout = nn.Dropout(self.dropout_prob)
        self.embedding = nn.Embedding(num_embeddings = self.vocab_size, embedding_dim = self.embedding_dim)

        if self.cell_type == 'RNN':
            self.rnn = nn.RNN(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, batch_first=True, dropout=self.dropout_prob, bidirectional=bool(self.bidirectional))
        elif self.cell_type == 'LSTM':
            self.rnn = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, batch_first=True, dropout=self.dropout_prob, bidirectional=bool(self.bidirectional))
        elif self.cell_type == 'GRU':
            self.rnn = nn.GRU(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, batch_first=True, dropout=self.dropout_prob, bidirectional=bool(self.bidirectional))
        else:
            raise ValueError(f"Unsupported cell_type '{self.cell_type}'. Supported types: 'RNN', 'LSTM', 'GRU'.")

        self.fc = nn.Linear(self.hidden_dim, self.vocab_size)

    def forward(self, x, hidden):
        # x has shape (batch_size, seq_len)

        # Calculate embedding
        embedding = self.embedding(x)

        if(self.dropout_prob != 0) : embedding = self.dropout(embedding)

        # Pass embedding and hidden state through RNN
        output, hidden = self.rnn(embedding, hidden)

        # Apply dropout to output
        if(self.dropout_prob != 0) : output = self.dropout(output)

        # Convert hidden state tuple to tensor for linear layer
        if self.cell_type == 'LSTM':
            hidden = torch.cat([h for h in hidden], dim=1)
        else:
            hidden = hidden.squeeze()

        # Pass output through linear layer
        output = self.fc(output)

        return output, hidden


class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()

        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, source, target):
        # Encode source sequence
        encoder_hidden = self.encoder(source)

        # Initialize decoder hidden state with encoder final hidden state
        if self.decoder.cell_type == 'LSTM':
            decoder_hidden = (encoder_hidden[0][-self.decoder.num_layers:], encoder_hidden[1][-self.decoder.num_layers:])
        else:
            decoder_hidden = encoder_hidden[-self.decoder.num_layers:]

        # Initialize output tensor
        target_len = target.shape[1]
        batch_size = target.shape[0]

        vocab_size = self.decoder.vocab_size
        outputs = torch.zeros(batch_size, target_len, vocab_size).to(target.device)

        # Use teacher forcing
        input_token = target[:, 0] 
        for t in range(1, target_len):

            if(len(decoder_hidden.shape)==2 ) : decoder_hidden = decoder_hidden.unsqueeze(0)

            output, decoder_hidden = self.decoder(input_token.unsqueeze(1), decoder_hidden)
            outputs[:, t] = output.squeeze(1)

            # Determine next input token using teacher forcing
            input_token = target[:, t]

        return outputs


encoder = Encoder(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS, DROPOUT, CELL_TYPE, BIDIRECTIONAL)
decoder = Decoder(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS, DROPOUT, CELL_TYPE, BIDIRECTIONAL)

source = torch.randint(low=0, high=VOCAB_SIZE, size=(BATCH_SIZE, 10))
target = torch.randint(low=0, high=VOCAB_SIZE, size=(BATCH_SIZE, 10))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Seq2Seq(encoder, decoder, device)

output = model.forward(source,target)

AttributeError: 'tuple' object has no attribute 'shape'

In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = Seq2Seq(encoder, decoder, device=device).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

for epoch in range(EPOCHS):
    # Train
    model.train()
    train_loss = 0
    for src, tgt in train_loader:
        src = src.to(device)
        tgt = tgt.to(device)

        optimizer.zero_grad()

        output = model.forward(src, tgt)
        output_dim = output.shape[-1]

        # Flatten output and target tensors to calculate loss
        loss = criterion(output.view(-1, output_dim), tgt.view(-1))

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

        optimizer.step()
        train_loss += loss.item()

    # Evaluate on validation set
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for src, tgt in val_loader:
            src = src.to(device)
            tgt = tgt.to(device)

            output= model.forward(src, tgt)
            output_dim = output.shape[-1]

            # Flatten output and target tensors to calculate loss
            loss = criterion(output.view(-1, output_dim), tgt.view(-1))

            val_loss += loss.item()

    # Print statistics for the epoch
    print("Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}"
          .format(epoch+1, EPOCHS, train_loss/len(train_loader), val_loss/len(val_loader)))


KeyboardInterrupt: 

In [44]:
import wandb

wandb.login()

sweep_config = {
    "method": "random",
    'parameters': {
        'input_embedding_size': {'values': [32, 64, 128]},
        'num_encoder_layers': {'values': [1, 2, 3]},
        'num_decoder_layers': {'values': [1, 2, 3]},
        'hidden_layer_size' : {'values' : [32,64,128,256]},
        'cell_type' : {'values' : ['LSTM','GRU','RNN']},
        'bidirectional' : {'values' : [0]},
        'dropout' : {'values' : [0,0.2,0.3]},
        'teacher_forcing' : {'values' : [0.5, 0.75, 1]}
    }
}

sweep_id = wandb.sweep(sweep_config, project="rnn_runs")

def train(config=None):
   
    wandb.init(config=config)
    run_name = "ies_"+str(wandb.config.input_embedding_size)+"_nel_"+str(wandb.config.num_encoder_layers)+"_ndl_"+str(wandb.config.num_decoder_layers)+"_hls_"+str(wandb.config.hidden_layer_size)+"_hls_"+str(wandb.config.hidden_layer_size)+"_cell_"+str(wandb.config.cell_type)
    wandb.run.name = run_name
    config = wandb.config

    VOCAB_SIZE = 131
    EMBEDDING_DIM = config.input_embedding_size
    HIDDEN_DIM = config.hidden_layer_size
    EPOCHS = 5
    NUM_LAYERS_ENCODER = config.num_encoder_layers
    NUM_LAYERS_DECODER =config.num_decoder_layers
    DROPOUT = 0 if ((NUM_LAYERS_ENCODER == 1) or (NUM_LAYERS_DECODER == 1)) else config.dropout
    BIDIRECTIONAL = config.bidirectional
    CELL_TYPE = config.cell_type
    TEACHER_FORCING = config.teacher_forcing

    encoder = Encoder(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS_ENCODER, DROPOUT, CELL_TYPE, BIDIRECTIONAL)
    decoder = Decoder(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS_DECODER, DROPOUT, CELL_TYPE, BIDIRECTIONAL)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Seq2Seq(encoder, decoder, device)

    model = Seq2Seq(encoder, decoder, device=device).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(EPOCHS):
        
        model.train()
        train_loss = 0
        for src, tgt in train_loader:
            src = src.to(device)
            tgt = tgt.to(device)

            optimizer.zero_grad()

            output = model.forward(src, tgt)
            output_dim = output.shape[-1]

    
            loss = criterion(output.view(-1, output_dim), tgt.view(-1))

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

            optimizer.step()
            train_loss += loss.item()

        
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for src, tgt in val_loader:
                src = src.to(device)
                tgt = tgt.to(device)

                output= model.forward(src, tgt)
                output_dim = output.shape[-1]

                
                loss = criterion(output.view(-1, output_dim), tgt.view(-1))

                val_loss += loss.item()

        
        wandb.log({"train_loss": train_loss/len(train_loader), "val_loss": val_loss/len(val_loader), "epochs" : epoch})       

wandb.agent(sweep_id, train, count = 20) 

Create sweep with ID: u1n7ugt4
Sweep URL: https://wandb.ai/nihil666/rnn_runs/sweeps/u1n7ugt4


[34m[1mwandb[0m: Agent Starting Run: rhy8pgk2 with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3
[34m[1mwandb[0m: 	teacher_forcing: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.185334…

0,1
epochs,▁▃▅▆█
train_loss,█▇▂▅▁
val_loss,█▃▂▁▂

0,1
epochs,4.0
train_loss,0.83468
val_loss,897.68121


[34m[1mwandb[0m: Agent Starting Run: z2n27ulj with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3
[34m[1mwandb[0m: 	teacher_forcing: 0.75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run z2n27ulj errored: AttributeError("'tuple' object has no attribute 'shape'")
[34m[1mwandb[0m: [32m[41mERROR[0m Run z2n27ulj errored: AttributeError("'tuple' object has no attribute 'shape'")
[34m[1mwandb[0m: Agent Starting Run: 59k7zjk2 with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	input_embedding_size: 128
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1
[34m[1mwandb[0m: 	teacher_forcing: 0.75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 59k7zjk2 errored: RuntimeError('Expected hidden size (3, 4, 64), got [1, 4, 64]')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 59k7zjk2 errored: RuntimeError('Expected hidden size (3, 4, 64), got [1, 4, 64]')
[34m[1mwandb[0m: Agent Starting Run: b8dbo259 with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 3
[34m[1mwandb[0m: 	teacher_forcing: 0.5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.185319…

0,1
epochs,▁▃▅▆█
train_loss,█▁▂▂▄
val_loss,█▄▂▂▁

0,1
epochs,4.0
train_loss,0.49989
val_loss,392.87652


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qo1rxuhc with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2
[34m[1mwandb[0m: 	teacher_forcing: 0.75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.185344…

0,1
epochs,▁▃▅▆█
train_loss,▁▄▃█▃
val_loss,█▄▄▁▁

0,1
epochs,4.0
train_loss,0.83704
val_loss,885.80134


[34m[1mwandb[0m: Agent Starting Run: ibijjtk1 with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	input_embedding_size: 64
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3
[34m[1mwandb[0m: 	teacher_forcing: 0.75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.185294…

0,1
epochs,▁▃▅▆█
train_loss,▇█▃▁▆
val_loss,█▄▃▂▁

0,1
epochs,4.0
train_loss,0.46087
val_loss,404.26231


[34m[1mwandb[0m: Agent Starting Run: cmw4chzl with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	input_embedding_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1
[34m[1mwandb[0m: 	teacher_forcing: 0.75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.189864…

Run cmw4chzl errored: AttributeError("'tuple' object has no attribute 'shape'")
[34m[1mwandb[0m: [32m[41mERROR[0m Run cmw4chzl errored: AttributeError("'tuple' object has no attribute 'shape'")
[34m[1mwandb[0m: Agent Starting Run: qvju4oqq with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 128
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3
[34m[1mwandb[0m: 	teacher_forcing: 0.75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epochs,▁▃▅▆█
train_loss,▆█▅▁▆
val_loss,█▂▁▄▄

0,1
epochs,4.0
train_loss,0.51878
val_loss,394.36446


[34m[1mwandb[0m: Agent Starting Run: 8e1mmzun with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1
[34m[1mwandb[0m: 	teacher_forcing: 0.5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 8e1mmzun errored: RuntimeError('Expected hidden size (2, 4, 256), got [1, 4, 256]')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 8e1mmzun errored: RuntimeError('Expected hidden size (2, 4, 256), got [1, 4, 256]')
[34m[1mwandb[0m: Agent Starting Run: f3mfvkc2 with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	input_embedding_size: 128
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2
[34m[1mwandb[0m: 	teacher_forcing: 0.5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.185245…

0,1
epochs,▁▃▅▆█
train_loss,▇▆▇█▁
val_loss,█▄▂▁▁

0,1
epochs,4.0
train_loss,0.23459
val_loss,406.71735


[34m[1mwandb[0m: Agent Starting Run: b9u2z4d2 with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 32
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3
[34m[1mwandb[0m: 	teacher_forcing: 0.75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.185245…

0,1
epochs,▁▃▅▆█
train_loss,█▅▁▅▆
val_loss,█▅▁▄▂

0,1
epochs,4.0
train_loss,1.00803
val_loss,930.31149


[34m[1mwandb[0m: Agent Starting Run: hdzu6g42 with config:
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	input_embedding_size: 128
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2
[34m[1mwandb[0m: 	teacher_forcing: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.185344…

0,1
epochs,▁▃▅▆█
train_loss,▂▄▁██
val_loss,█▁▃▂▃

0,1
epochs,4.0
train_loss,0.92917
val_loss,916.11738


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
