In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from model import test_model_instance, seq2seq_attn

BATCH_SIZE = 64

x_train = np.loadtxt("akshar_sequences//x_train.csv", delimiter=",", dtype=int)
y_train = np.loadtxt("akshar_sequences//y_train.csv", delimiter=",", dtype=int)
x_test = np.loadtxt("akshar_sequences//x_test.csv", delimiter=",", dtype=int)
y_test = np.loadtxt("akshar_sequences//y_test.csv", delimiter=",", dtype=int)
x_val = np.loadtxt("akshar_sequences//x_val.csv", delimiter=",", dtype=int)
y_val = np.loadtxt("akshar_sequences//y_val.csv", delimiter=",", dtype=int)


class SequenceDataset(torch.utils.data.Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        x = torch.from_numpy(self.x[index]).long() 
        y = torch.from_numpy(self.y[index]).long() 
        return x, y
    
    def __len__(self):
        return len(self.x)

train_dataset = SequenceDataset(x_train, y_train)
val_dataset = SequenceDataset(x_val, y_val)
test_dataset = SequenceDataset(x_test, y_test)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from model import seq2seq_attn, test_model_instance

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

VOCAB_SIZE = 131
MAX_SEQ_SIZE = 28
BATCH_SIZE = 8

config = {
    'input_embedding_size': [512, 1024],
    'num_encoder_layers': [6],
    'num_decoder_layers': [6],
    'hidden_layer_size': [32,64],
    'cell_type_encoder': ['LSTM', 'GRU', 'RNN'],
    'cell_type_decoder': ['LSTM', 'GRU', 'RNN'],
    'bidirectional': [0, 1],
    'dropout': [0, 0.2],
    'teacher_forcing': [0, 0.5, 1]
}

test_model_instance(config, 'attn', 128)

100%|██████████| 432/432 [00:37<00:00, 11.45it/s]

PASSED 432 CONFIGS.





In [1]:
import wandb
from model import compare_sequences, seq2seq_attn

wandb.login()

sweep_config = {
    "method": "bayes",
    "metric":{
    "name": "val_accuracy",
    "goal": "maximize"
    },
    'parameters': {
        'input_embedding_size': {'values': [256, 512, 1024]},
        'num_encoder_layers': {'values': [4, 5, 6]},
        'num_decoder_layers': {'values': [4, 5, 6]},
        'hidden_layer_size' : {'values' : [32, 64]},
        'cell_type_encoder' : {'values' : ['LSTM','GRU','RNN']},
        'cell_type_decoder' : {'values' : ['LSTM','GRU','RNN']},
        'bidirectional' : {'values' : [0, 1]},
        'dropout' : {'values' : [0,0.2,0.3]},
        'teacher_forcing' : {'values' : [0, 0.5, 0.75, 1]},
        'batch_size' : {'values' : [8,16,64,128,512]}
    }
}

sweep_id = wandb.sweep(sweep_config, project="attn_runs")

def train(config=None):
   
    wandb.init(config=config)
    run_name = "ies_"+str(wandb.config.input_embedding_size)+"_nel_"+str(wandb.config.num_encoder_layers)+"_ndl_"+str(wandb.config.num_decoder_layers)+"_hls_"+str(wandb.config.hidden_layer_size)+"_cte_"+str(wandb.config.cell_type_encoder)+"_ctd_"+str(wandb.config.cell_type_decoder)+"_tf_"+str(wandb.config.teacher_forcing)+"_bs_"+str(wandb.config.batch_size)
    wandb.run.name = run_name
    config = wandb.config

    VOCAB_SIZE = 131
    EMBEDDING_DIM = config.input_embedding_size
    HIDDEN_DIM = config.hidden_layer_size
    EPOCHS = 25
    NUM_LAYERS_ENCODER = config.num_encoder_layers
    NUM_LAYERS_DECODER =config.num_decoder_layers
    DROPOUT = config.dropout
    BIDIRECTIONAL = config.bidirectional
    CELL_TYPE_ENCODER = config.cell_type_encoder
    CELL_TYPE_DECODER = config.cell_type_decoder
    TEACHER_FORCING = config.teacher_forcing
    MAX_SEQ_SIZE = 28
    BATCH_SIZE = config.batch_size

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = seq2seq_attn(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS_ENCODER, NUM_LAYERS_DECODER, 
                    DROPOUT, BIDIRECTIONAL, CELL_TYPE_ENCODER, CELL_TYPE_DECODER, TEACHER_FORCING, 
                    BATCH_SIZE, MAX_SEQ_SIZE, debugging = False)

    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())

    for epoch in range(EPOCHS):
        
        model.train()
        running_loss = 0.0
        train_accuracy = 0
        val_accuracy = 0
        
        for batch_idx, (inputs, targets) in (enumerate(train_loader)):
            
            inputs = inputs.to(device)
            targets = targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs, targets)
            
            train_accuracy += compare_sequences(targets, outputs)

            loss = criterion(outputs.reshape(-1, model.output_size), targets.reshape(-1))
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        

        model.eval()
        val_loss = 0.0
        
        with torch.no_grad():
            for inputs, targets in (val_loader):
            
                inputs = inputs.to(device)
                targets = targets.to(device)
                outputs = model(inputs, targets)

                loss = criterion(outputs.reshape(-1, model.output_size), targets.reshape(-1))
                val_accuracy += compare_sequences(targets, outputs)
                
                val_loss += loss.item()
        
        wandb.log({"train_loss": running_loss/len(train_loader), "val_loss": val_loss/len(val_loader), "epochs" : epoch, 
                   "training_accuracy" : train_accuracy/len(train_dataset), "val_accuracy" : val_accuracy/len(val_dataset)})       
        
        torch.cuda.empty_cache()

wandb.agent(sweep_id, train, count = 50) 

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcs22m061[0m ([33mnihil666[0m). Use [1m`wandb login --relogin`[0m to force relogin


Create sweep with ID: g230zh9k
Sweep URL: https://wandb.ai/nihil666/attn_runs/sweeps/g230zh9k


[34m[1mwandb[0m: Agent Starting Run: thcxw1nr with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type_decoder: LSTM
[34m[1mwandb[0m: 	cell_type_encoder: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	input_embedding_size: 256
[34m[1mwandb[0m: 	num_decoder_layers: 5
[34m[1mwandb[0m: 	num_encoder_layers: 6
[34m[1mwandb[0m: 	teacher_forcing: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run thcxw1nr errored: NameError("name 'torch' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run thcxw1nr errored: NameError("name 'torch' is not defined")
[34m[1mwandb[0m: Agent Starting Run: pja2s8fn with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	bidirectional: 0
[34m[1mwandb[0m: 	cell_type_decoder: RNN
[34m[1mwandb[0m: 	cell_type_encoder: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	input_embedding_size: 512
[34m[1mwandb[0m: 	num_decoder_layers: 6
[34m[1mwandb[0m: 	num_encoder_layers: 6
[34m[1mwandb[0m: 	teacher_forcing: 0
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [None]:
from model import compare_sequences
from tqdm import tqdm

VOCAB_SIZE = 131
EMBEDDING_DIM = 1024
HIDDEN_DIM = 64
EPOCHS = 20
NUM_LAYERS_ENCODER = 6
NUM_LAYERS_DECODER = 6
DROPOUT = 0.2
BIDIRECTIONAL = 1
CELL_TYPE_ENCODER = "GRU"
CELL_TYPE_DECODER = "LSTM"
TEACHER_FORCING = 0.75
MAX_SEQ_SIZE = 28

import torch
import torch.nn as nn
import torch.optim as optim

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = seq2seq_attn(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, NUM_LAYERS_ENCODER, NUM_LAYERS_DECODER, 
                 DROPOUT, BIDIRECTIONAL, CELL_TYPE_ENCODER, CELL_TYPE_DECODER, TEACHER_FORCING, 
                 BATCH_SIZE, MAX_SEQ_SIZE, debugging = False)

model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

for epoch in range(EPOCHS):
    
    model.train()
    running_loss = 0.0
    train_accuracy = 0
    val_accuracy = 0
    
    for batch_idx, (inputs, targets) in tqdm(enumerate(train_loader)):
        
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs, targets)
        
        train_accuracy += compare_sequences(targets, outputs)

        loss = criterion(outputs.reshape(-1, model.output_size), targets.reshape(-1))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    

    model.eval()
    val_loss = 0.0
    
    with torch.no_grad():
        for inputs, targets in tqdm(val_loader):
           
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs = model(inputs, targets)

            loss = criterion(outputs.reshape(-1, model.output_size), targets.reshape(-1))
            val_accuracy += compare_sequences(targets, outputs)
            
            val_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{EPOCHS}], Validation Loss: {val_loss / len(val_loader)}")
    print("Training Accuracy {0}, Validation Accuracy {1}".format(train_accuracy/(len(train_dataset)), val_accuracy/(len(val_dataset))))
    torch.cuda.empty_cache()