In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from model import seq2seq, test_model_instance

BATCH_SIZE = 4

x_train = np.loadtxt("akshar_sequences//x_train.csv", delimiter=",", dtype=int)
y_train = np.loadtxt("akshar_sequences//y_train.csv", delimiter=",", dtype=int)
x_test = np.loadtxt("akshar_sequences//x_test.csv", delimiter=",", dtype=int)
y_test = np.loadtxt("akshar_sequences//y_test.csv", delimiter=",", dtype=int)
x_val = np.loadtxt("akshar_sequences//x_val.csv", delimiter=",", dtype=int)
y_val = np.loadtxt("akshar_sequences//y_val.csv", delimiter=",", dtype=int)


class SequenceDataset(torch.utils.data.Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        x = torch.from_numpy(self.x[index]).long() 
        y = torch.from_numpy(self.y[index]).long() 
        return x, y
    
    def __len__(self):
        return len(self.x)

train_dataset = SequenceDataset(x_train, y_train)
val_dataset = SequenceDataset(x_val, y_val)
test_dataset = SequenceDataset(x_test, y_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

config = {
    'input_embedding_size': [32],
    'num_encoder_layers': [1, 2, 3],
    'num_decoder_layers': [1, 2, 3],
    'hidden_layer_size': [128],
    'cell_type_encoder': ['LSTM', 'GRU', 'RNN'],
    'cell_type_decoder': ['LSTM', 'GRU', 'RNN'],
    'bidirectional': [0, 1],
    'dropout': [0, 0.2],
    'teacher_forcing': [0, 0.5, 1]
}

test_model_instance(config)

  0%|          | 0/972 [00:00<?, ?it/s]


RuntimeError: expand(torch.FloatTensor{[4, 1, 131]}, size=[4, 131]): the number of sizes provided (2) must be greater or equal to the number of dimensions in the tensor (3)

In [None]:
import wandb

wandb.login()

sweep_config = {
    "method": "random",
    'parameters': {
        'input_embedding_size': {'values': [32, 64, 128]},
        'num_encoder_layers': {'values': [1, 2, 3]},
        'num_decoder_layers': {'values': [1, 2, 3]},
        'hidden_layer_size' : {'values' : [32,64,128,256]},
        'cell_type' : {'values' : ['LSTM','GRU','RNN']},
        'bidirectional' : {'values' : [0]},
        'dropout' : {'values' : [0,0.2,0.3]},
        'teacher_forcing' : {'values' : [0.5, 0.75, 1]}
    }
}

sweep_id = wandb.sweep(sweep_config, project="rnn_runs")

def train(config=None):
   
    wandb.init(config=config)
    run_name = "ies_"+str(wandb.config.input_embedding_size)+"_nel_"+str(wandb.config.num_encoder_layers)+"_ndl_"+str(wandb.config.num_decoder_layers)+"_hls_"+str(wandb.config.hidden_layer_size)+"_hls_"+str(wandb.config.hidden_layer_size)+"_cell_"+str(wandb.config.cell_type)
    wandb.run.name = run_name
    config = wandb.config

    VOCAB_SIZE = 131
    EMBEDDING_DIM = config.input_embedding_size
    HIDDEN_DIM = config.hidden_layer_size
    EPOCHS = 5
    NUM_LAYERS_ENCODER = config.num_encoder_layers
    NUM_LAYERS_DECODER =config.num_decoder_layers
    DROPOUT = 0 if ((NUM_LAYERS_ENCODER == 1) or (NUM_LAYERS_DECODER == 1)) else config.dropout
    BIDIRECTIONAL = config.bidirectional
    CELL_TYPE = config.cell_type
    TEACHER_FORCING = config.teacher_forcing

    encoder = Encoder(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS_ENCODER, DROPOUT, CELL_TYPE, BIDIRECTIONAL)
    decoder = Decoder(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS_DECODER, DROPOUT, CELL_TYPE, BIDIRECTIONAL)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Seq2Seq(encoder, decoder, device)

    model = Seq2Seq(encoder, decoder, device=device).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(EPOCHS):
        
        model.train()
        train_loss = 0
        for src, tgt in train_loader:
            src = src.to(device)
            tgt = tgt.to(device)

            optimizer.zero_grad()

            output = model.forward(src, tgt)
            output_dim = output.shape[-1]

    
            loss = criterion(output.view(-1, output_dim), tgt.view(-1))

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

            optimizer.step()
            train_loss += loss.item()

        
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for src, tgt in val_loader:
                src = src.to(device)
                tgt = tgt.to(device)

                output= model.forward(src, tgt)
                output_dim = output.shape[-1]

                
                loss = criterion(output.view(-1, output_dim), tgt.view(-1))

                val_loss += loss.item()

        
        wandb.log({"train_loss": train_loss/len(train_loader), "val_loss": val_loss/len(val_loader), "epochs" : epoch})       

wandb.agent(sweep_id, train, count = 20) 

In [7]:
from model import compare_sequences
from tqdm import tqdm

VOCAB_SIZE = 131
EMBEDDING_DIM = 128
HIDDEN_DIM = 256
EPOCHS = 5
NUM_LAYERS_ENCODER = 3
NUM_LAYERS_DECODER = 3
DROPOUT = 0.2
BIDIRECTIONAL = 1
CELL_TYPE_ENCODER = "LSTM"
CELL_TYPE_DECODER = "LSTM"
TEACHER_FORCING = 0.75
BATCH_SIZE = 4
MAX_SEQ_SIZE = 28

import torch
import torch.nn as nn
import torch.optim as optim

device = "cpu"

model = seq2seq(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS_ENCODER, NUM_LAYERS_DECODER, 
                 DROPOUT, BIDIRECTIONAL, CELL_TYPE_ENCODER, CELL_TYPE_DECODER, TEACHER_FORCING, 
                 BATCH_SIZE, MAX_SEQ_SIZE, debugging = False)

model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())


for epoch in range(EPOCHS):
    
    model.train()
    running_loss = 0.0
    train_accuracy = 0
    val_accuracy = 0
    
    for batch_idx, (inputs, targets) in tqdm(enumerate(train_loader)):
        
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs, targets)
        
        train_accuracy += compare_sequences(targets, outputs)

        loss = criterion(outputs.reshape(-1, model.output_size), targets.reshape(-1))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    

    model.eval()
    val_loss = 0.0
    
    with torch.no_grad():
        for inputs, targets in tqdm(val_loader):
           
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs = model(inputs, targets)

            loss = criterion(outputs.reshape(-1, model.output_size), targets.reshape(-1))
            val_accuracy += compare_sequences(targets, outputs)
            
            val_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{EPOCHS}], Validation Loss: {val_loss / len(val_loader)}")
    print("Training Accuracy {0}, Validation Accuracy {1}".format(train_accuracy/(len(train_loader)), val_accuracy/(len(val_loader))))
    torch.cuda.empty_cache()

12800it [1:39:52,  2.14it/s]
  0%|          | 0/1024 [00:00<?, ?it/s]


RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.

In [9]:
train_accuracy/len(train_loader)

0.021953125