Setting hyperparameters (hidden layer size, dropout probability, learning rate) using scorer_val dataset. Partly adapted from https://github.com/chiyuzhang94/hyperpartisan-ubc/blob/master/pytorch/bilstm_optim.py

In [0]:
! pip install sacremoses
import torch
import torch.nn.functional as F
import torch.autograd as autograd
import torch.nn as nn
from torch.utils.data import Dataset
#from utils.sensation_config import *
import numpy as np
import argparse
import logging
import pickle
import pandas as pd
import sacremoses
from torch.utils.data import dataloader, Dataset
from google.colab import drive
from tqdm import tqdm
drive.mount('/content/drive')
#from utils.global_variables import *

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
dataset = '/content/drive/My Drive/dataset/dataset'

# Set hyperparameters

In [0]:
# Hyper Parameters
batch_size = 50
sequence_length = 61
embedding_dim = 300
num_epoch = 5
num_layers = 1
num_classes = 2
bidirectional = True

In [0]:
# Parameters we want to fine-tune
op_parameters = []
var_dic = {}
var_dic['var_name'] = 'lr' # learning rate
var_dic['var_ini'] = 0.01
var_dic['var_range'] = [0.1,0.01,0.001,0.0001]
op_parameters.append(var_dic)

var_dic = {}
var_dic['var_name'] = "hidden_dim"
var_dic['var_ini'] = 100
var_dic['var_range'] = range(50,200,50) # range(100,501,100)
op_parameters.append(var_dic)

var_dic = {}
var_dic['var_name'] = "dropout"
var_dic['var_ini'] = 0.1
var_dic['var_range'] = [0.0,0.1,0.3,0.5,0.7,0.9]
op_parameters.append(var_dic)

In [0]:
# Load pickle files
scorer_val = pd.read_pickle(dataset+'/scorer_val.pickle')

In [0]:
data_size = scorer_val.shape[0]

In [0]:
# Split into train and test 
def split_dataset(dataset):
    train_size = int(dataset.shape[0]*0.8)
    train_df = dataset[:train_size]
    test_df = dataset[train_size:]
    return train_df, test_df
scorer_val_train, scorer_val_test = split_dataset(scorer_val)
train_texts, train_labels = list(scorer_val_train.title), list(scorer_val_train.hyperpartisan)
test_texts, test_labels = list(scorer_val_test.title), list(scorer_val_test.hyperpartisan)

In [0]:
train_size = len(train_texts)
test_size = len(test_texts)

# Load GloVe

In [0]:
# Load GloVe
!wget https://docs.google.com/uc?id=1KMJTagaVD9hFHXFTPtNk0u2JjvNlyCAu -O glove_split.aa
!wget https://docs.google.com/uc?id=1LF2yD2jToXriyD-lsYA5hj03f7J3ZKaY -O glove_split.ab
!wget https://docs.google.com/uc?id=1N1xnxkRyM5Gar7sv4d41alyTL92Iip3f -O glove_split.ac
!cat glove_split.?? > 'glove.6B.300d__50k.txt'
def load_glove(glove_path, embedding_dim):
    with open(glove_path) as f:
        token_ls = [PAD_TOKEN, UNK_TOKEN]
        embedding_ls = [np.zeros(embedding_dim), np.random.rand(embedding_dim)]
        for line in f:
            token, raw_embedding = line.split(maxsplit=1)
            token_ls.append(token)
            embedding = np.array([float(x) for x in raw_embedding.split()])
            embedding_ls.append(embedding)
        embeddings = np.array(embedding_ls)
    return token_ls, embeddings

PAD_TOKEN = '<PAD>'
UNK_TOKEN = '<UNK>'
glove_path = "glove.6B.300d__50k.txt"

vocab, embeddings = load_glove(glove_path, embedding_dim)

--2020-05-11 21:04:40--  https://docs.google.com/uc?id=1KMJTagaVD9hFHXFTPtNk0u2JjvNlyCAu
Resolving docs.google.com (docs.google.com)... 74.125.195.138, 74.125.195.100, 74.125.195.139, ...
Connecting to docs.google.com (docs.google.com)|74.125.195.138|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-0k-0g-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/313hbk9iut40n40f84qvjae35iihlc79/1589231025000/14514704803973256873/*/1KMJTagaVD9hFHXFTPtNk0u2JjvNlyCAu [following]
--2020-05-11 21:04:47--  https://doc-0k-0g-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/313hbk9iut40n40f84qvjae35iihlc79/1589231025000/14514704803973256873/*/1KMJTagaVD9hFHXFTPtNk0u2JjvNlyCAu
Resolving doc-0k-0g-docs.googleusercontent.com (doc-0k-0g-docs.googleusercontent.com)... 173.194.202.132, 2607:f8b0:400e:c00::84
Connecting to doc-0k-0g-docs.googleusercontent.com (doc-0k-0g-docs.googleusercontent.com)|173.194.20

# Featurize data

In [0]:
# Featurzie data
def featurize(data, labels, tokenizer, vocab, max_seq_length=128):
    vocab_to_idx = {word: i for i, word in enumerate(vocab)}
    text_data = []
    label_data = []
    for ex in tqdm(data):
        tokenized = tokenizer.tokenize(ex)
        ids = [vocab_to_idx.get(token, 1) for token in tokenized]
        text_data.append(ids)
    return text_data, labels

tokenizer = sacremoses.MosesTokenizer()
train_data_indices, train_labels = featurize(train_texts, train_labels, tokenizer, vocab)
test_data_indices, test_labels = featurize(test_texts, test_labels, tokenizer, vocab)

100%|██████████| 15000/15000 [00:02<00:00, 5695.08it/s]
100%|██████████| 3750/3750 [00:00<00:00, 5848.10it/s]


# DataLoaders

In [0]:
# Create DataLoaders
class HyperpartisanDataset(Dataset):
    """
    Class that represents a train/validation/test dataset that's readable for PyTorch
    Note that this class inherits torch.utils.data.Dataset
    """

    def __init__(self, data_list, target_list, sequence_length):
        """
        @param data_list: list of data tokens 
        @param target_list: list of data targets 

        """
        self.data_list = data_list
        self.target_list = target_list
        self.sequence_length = sequence_length
        assert (len(self.data_list) == len(self.target_list))

    def __len__(self):
        return len(self.data_list)
        
    def __getitem__(self, key, sequence_length=None):
        """
        Triggered when you call dataset[i]
        """
        if sequence_length is None:
            sequence_length = self.sequence_length
        token_idx = self.data_list[key][:sequence_length]
        label = self.target_list[key]
        return [token_idx, label]

    def collate_func(self,batch):
        """
        Customized function for DataLoader that dynamically pads the batch so that all 
        data have the same length
        """ 
        data_list = [] # store padded sequences
        label_list = []

        if len(max(self.data_list,key=len)) < self.sequence_length:
          max_batch_seq_len = len(max(self.data_list,key=len))
        else:
           max_batch_seq_len = self.sequence_length # the length of longest sequence in batch
                                                      # if it is less than self.sequence_length.
                                                      # else max_batch_seq_len = self.sequence_length 
        for row in batch:
          if len(row[0]) < max_batch_seq_len:
            row[0] += [0] * (max_batch_seq_len - len(row[0])) # pad the sequence with 0 if it's shorter than max_batch_seq_len
          row[0] = row[0][:self.sequence_length] # only trim the sequence longer than self.sequence_length
          data_list.append(row[0])
          label_list.append(row[1]) 
        data_list = torch.LongTensor(data_list)
        label_list = torch.LongTensor(label_list)

        return [data_list, label_list]

In [0]:
train_dataset = HyperpartisanDataset(train_data_indices, train_labels, sequence_length)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size,
                                           collate_fn=train_dataset.collate_func,
                                           shuffle=False)

test_dataset = HyperpartisanDataset(test_data_indices, test_labels, sequence_length)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                           batch_size=batch_size,
                                           collate_fn=test_dataset.collate_func,
                                           shuffle=False)

In [0]:
data_batch, labels = next(iter(train_loader))
print("data batch dimension: ", data_batch.size())
print("data_batch: ", data_batch)
print("labels: ", labels)

data batch dimension:  torch.Size([50, 61])
data_batch:  tensor([[3755,   47,   16,  ...,    0,    0,    0],
        [9124, 3073, 1536,  ...,    0,    0,    0],
        [ 523,  975, 1122,  ...,    0,    0,    0],
        ...,
        [2925,   47, 8707,  ...,    0,    0,    0],
        [ 142, 4390,  467,  ...,    0,    0,    0],
        [8322,    3,  733,  ...,    0,    0,    0]])
labels:  tensor([0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,
        1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1,
        1, 1])


In [0]:
# Initialize values
for i, item in enumerate(op_parameters):
    exec("%s = %f" % (item['var_name'],item['var_ini']))

# LSTMClassifier

In [0]:
# Define BiLSTM Model
class LSTMClassifier(nn.Module):
    """
    LSTMClassifier classification model
    """
    def __init__(self, embeddings, hidden_dim, num_layers, num_classes, bidirectional, dropout_prob):
        super().__init__()
        self.hidden_dim = hidden_dim
        # self.embedding_layer = self.load_pretrained_embeddings(embeddings)
        # self.lstm = nn.LSTM( # LSTM layer
        #                     self.embedding_layer.embedding_dim,\
        #                     hidden_size = hidden_dim, \
        #                     num_layers = num_layers, \
        #                     bidirectional = bidirectional, \
        #                     batch_first = True,\
        #                     dropout=dropout_prob)
        # self.softmax = nn.LogSoftmax() 
        # self.dropout_layer = nn.Dropout(p=dropout_prob) # dropout some units of the hidden layer
        # self.hidden2out = nn.Linear(hidden_dim*(int(bidirectional)+1), num_classes) # classifier layer
        
        self.embedding_layer = self.load_pretrained_embeddings(embeddings)
        
        self.dropout = dropout_prob
        self.lstm = nn.LSTM(self.embedding_layer.embedding_dim, hidden_size= self.hidden_dim, \
                            num_layers = num_layers, bidirectional = bidirectional, \
                            batch_first = True, dropout = self.dropout)
        self.non_linearity = nn.ReLU() # For example, ReLU
        self.clf = nn.Linear(self.hidden_dim*(int(bidirectional)+1), num_classes) # classifier layer

    def load_pretrained_embeddings(self, embeddings):
        """
           The code for loading embeddings from Lab 2
           Unlike lab, we are not setting `embedding_layer.weight.requires_grad = False`
           because we want to finetune the embeddings on our data
        """
        embedding_layer = nn.Embedding(embeddings.shape[0], embeddings.shape[1], padding_idx=0)
        embedding_layer.weight.data = torch.Tensor(embeddings).float()
        return embedding_layer
    
    def forward(self, inputs):

        # Embedding 
        # Output dim: [batch_size, seq_length, embedding_dim]
        embedded = self.embedding_layer(inputs)

        # BiLSTM
        # Output dim: [batch_size, seq_length, hidden_dim * num_directions]
        h_0 = torch.zeros(num_layers*(int(bidirectional)+1), inputs.size(0), int(self.hidden_dim))
        c_0 = torch.zeros(num_layers*(int(bidirectional)+1), inputs.size(0), int(self.hidden_dim))
        bilstm_out, _ = self.lstm(embedded, (h_0, c_0))
        
        # # Dropout layer
        # # Output dim: [batch_size, hidden_dim * num_directions]
        # dropout_out = self.dropout_layer(bilstm_out[:, -1, :])

        # # Get output from hidden states
        # # Output dim: [batch_size, num_classes]
        # linear_out = self.hidden2out(dropout_out)   
       
        # # Outputting using softmax layer
        # # Output dim: [batch_size, num_classes]
        # prob = self.softmax(linear_out)

        # return prob
               
        # Average pooling
        # Output dim: [batch_size, num_directions * hidden_size]
        num_valid = (inputs == (0 or 1)) \
            .float().sum(dim=1).clamp(1) # non-padding, non-unknown
        averaged_out = bilstm_out.sum(dim=1) / num_valid.view(-1, 1)
        
        # Non-linearity (ReLU)
        # Output dim: [num_directions * hidden_size , num_classes]
        non_linearity_out = self.non_linearity(averaged_out)

        # Linear, returns logits 
        # Output dim: [batch_size, num_classes]
        logits = self.clf(non_linearity_out)

        return logits


# Training 

In [0]:
# Training
from sklearn.metrics import accuracy_score

def evaluate(model, dataloader, device):
    accuracy = None
    model.eval()
    with torch.no_grad():
      all_preds = []
      all_labels = []
      for batch_text, batch_labels in dataloader:
          preds = model(batch_text.to(device))
          all_preds.append(preds.detach().cpu().numpy())
          all_labels.append(batch_labels.detach().cpu().numpy())
      all_preds = np.concatenate(all_preds, axis=0)
      pred_labels = all_preds.argmax(-1)
      all_labels = np.concatenate(all_labels, axis=0)
      accuracy = accuracy_score(all_labels,pred_labels)

    return accuracy 

def train(num_epoch,\
          optimizer,\
          train_loader,\
          test_loader,
          model,\
          criterion,\
          device,\
          outpath):
   
    acc = 0
    train_loss = []
    train_acc = []
    test_acc = []
    print("training")
    for epoch in tqdm(range(num_epoch)):
        outfile = open(outpath+"/bilstm_output_!.txt",'a')

        # train the model and back-propagate by batch process
        for i, (data_batch, batch_labels) in enumerate(train_loader):
            preds = model(data_batch.to(device))
            loss = criterion(preds, batch_labels.to(device))
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            train_loss.append(loss.item())

        # get train accuracy for each epoch
        acc = evaluate(model, train_loader, device)
        print('Training Accuracy of the network on epoch %i: %f %%' % (epoch, acc*100))
        train_acc.append(acc)
        training_acc = acc

        # get test accuracy for each epoch
        acc = evaluate(model, test_loader, device)
        print('Testing Accuracy of the network on epoch %i: %f %%' % (epoch, acc*100))
        test_acc.append(acc)
        testing_acc = acc
        
        # # save model afer each epoch
        # if torch.cuda.device_count() <= 1:
        #     state_dict_model = model.state_dict()
        # else:
        #     state_dict_model = model.module.state_dict()

        # state = {
        # 'epoch': epoch,
        # 'state_dict': state_dict_model,
        # 'optimizer': optimizer.state_dict(),
        # }
        
        # torch.save(state, outpath+"/"+str(epoch)+".pt")

        outfile.write("Epoch: {}, Training Accuracy: {:.4f}, Validation Accuracy: {:.4f}\n".format(epoch,training_acc,testing_acc))
        outfile.close()
    return train_loss, train_acc, test_acc

In [0]:
results = {}

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
outpath = '/content/drive/My Drive/dataset/hyperpartisan_scorer/results'
criterion = nn.CrossEntropyLoss()
torch.manual_seed(1234)

<torch._C.Generator at 0x7fc659098150>

In [0]:
torch.cuda.is_available()

False

In [20]:
for item in op_parameters:
    best_acc = 0.0
    best_value = 0.0
    process = {}
    for value in item['var_range']:
        exec("%s = %f" % (item['var_name'],value))
        print("Testing %f of %s" % (value, item['var_name']))
        if torch.cuda.is_available():
            bilstm = LSTMClassifier(embeddings,hidden_dim = int(hidden_dim),num_layers = num_layers,num_classes = num_classes,dropout_prob=dropout,bidirectional = bidirectional).cuda()
        else:
            bilstm = LSTMClassifier(embeddings,hidden_dim = int(hidden_dim),num_layers = num_layers,num_classes = num_classes,dropout_prob=dropout, bidirectional = bidirectional)
        bilstm.train()
        optimizer = torch.optim.Adam(bilstm.parameters(), lr=lr)   # define a optimizer for backpropagation

        train_loss, train_acc, test_acc = train(num_epoch,optimizer,train_loader,test_loader,bilstm,criterion,device,outpath)

        process[value] = [sum(test_acc)/len(test_acc),sum(train_acc)/len(train_acc)]
        if best_acc < sum(test_acc)/len(test_acc):
            best_acc = sum(test_acc)/len(test_acc)
            best_value = value
            print("New best: ",best_acc)

    print("The best %s dimension is %f. Accuracy is %f" % (item['var_name'],best_value,best_acc))
    
    results[item['var_name']+'_best'] = best_value
    results[item['var_name']+'_process'] = process
    exec("%s = %f" % (item['var_name'],best_value))


Testing 0.100000 of lr


  "num_layers={}".format(dropout, num_layers))
  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 51.466667 %


 20%|██        | 1/5 [03:36<14:24, 216.02s/it]

Testing Accuracy of the network on epoch 0: 50.800000 %
Training Accuracy of the network on epoch 1: 74.126667 %


 40%|████      | 2/5 [06:49<10:27, 209.22s/it]

Testing Accuracy of the network on epoch 1: 63.386667 %
Training Accuracy of the network on epoch 2: 58.606667 %


 60%|██████    | 3/5 [09:57<06:45, 202.99s/it]

Testing Accuracy of the network on epoch 2: 55.280000 %
Training Accuracy of the network on epoch 3: 60.966667 %


 80%|████████  | 4/5 [12:51<03:14, 194.11s/it]

Testing Accuracy of the network on epoch 3: 55.573333 %
Training Accuracy of the network on epoch 4: 59.460000 %


100%|██████████| 5/5 [16:10<00:00, 194.18s/it]

Testing Accuracy of the network on epoch 4: 55.893333 %
New best:  0.5618666666666666
Testing 0.010000 of lr



  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 82.780000 %


 20%|██        | 1/5 [02:03<08:14, 123.71s/it]

Testing Accuracy of the network on epoch 0: 69.973333 %
Training Accuracy of the network on epoch 1: 90.760000 %


 40%|████      | 2/5 [04:06<06:09, 123.32s/it]

Testing Accuracy of the network on epoch 1: 70.080000 %
Training Accuracy of the network on epoch 2: 91.786667 %


 60%|██████    | 3/5 [06:08<04:06, 123.02s/it]

Testing Accuracy of the network on epoch 2: 67.840000 %
Training Accuracy of the network on epoch 3: 91.206667 %


 80%|████████  | 4/5 [08:10<02:02, 122.73s/it]

Testing Accuracy of the network on epoch 3: 66.320000 %
Training Accuracy of the network on epoch 4: 90.893333 %


100%|██████████| 5/5 [10:13<00:00, 122.77s/it]

Testing Accuracy of the network on epoch 4: 65.733333 %
New best:  0.6798933333333333
Testing 0.001000 of lr



  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 78.680000 %


 20%|██        | 1/5 [02:00<08:02, 120.71s/it]

Testing Accuracy of the network on epoch 0: 71.920000 %
Training Accuracy of the network on epoch 1: 87.533333 %


 40%|████      | 2/5 [04:00<06:00, 120.32s/it]

Testing Accuracy of the network on epoch 1: 72.053333 %
Training Accuracy of the network on epoch 2: 93.133333 %


 60%|██████    | 3/5 [05:58<03:59, 119.88s/it]

Testing Accuracy of the network on epoch 2: 70.986667 %
Training Accuracy of the network on epoch 3: 92.086667 %


 80%|████████  | 4/5 [07:58<01:59, 119.92s/it]

Testing Accuracy of the network on epoch 3: 68.000000 %
Training Accuracy of the network on epoch 4: 96.166667 %


100%|██████████| 5/5 [09:59<00:00, 119.90s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

Testing Accuracy of the network on epoch 4: 68.586667 %
New best:  0.7030933333333333
Testing 0.000100 of lr
training
Training Accuracy of the network on epoch 0: 65.466667 %


 20%|██        | 1/5 [02:01<08:06, 121.68s/it]

Testing Accuracy of the network on epoch 0: 64.880000 %
Training Accuracy of the network on epoch 1: 69.300000 %


 40%|████      | 2/5 [04:01<06:03, 121.13s/it]

Testing Accuracy of the network on epoch 1: 66.773333 %
Training Accuracy of the network on epoch 2: 71.926667 %


 60%|██████    | 3/5 [06:03<04:02, 121.26s/it]

Testing Accuracy of the network on epoch 2: 68.453333 %
Training Accuracy of the network on epoch 3: 75.653333 %


 80%|████████  | 4/5 [08:04<02:01, 121.39s/it]

Testing Accuracy of the network on epoch 3: 70.480000 %
Training Accuracy of the network on epoch 4: 78.406667 %


100%|██████████| 5/5 [10:06<00:00, 121.28s/it]

Testing Accuracy of the network on epoch 4: 70.533333 %
The best lr dimension is 0.001000. Accuracy is 0.703093
Testing 50.000000 of hidden_dim



  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 78.806667 %


 20%|██        | 1/5 [01:32<06:11, 92.93s/it]

Testing Accuracy of the network on epoch 0: 70.960000 %
Training Accuracy of the network on epoch 1: 87.946667 %


 40%|████      | 2/5 [03:04<04:37, 92.48s/it]

Testing Accuracy of the network on epoch 1: 71.066667 %
Training Accuracy of the network on epoch 2: 92.780000 %


 60%|██████    | 3/5 [04:36<03:04, 92.30s/it]

Testing Accuracy of the network on epoch 2: 69.173333 %
Training Accuracy of the network on epoch 3: 94.273333 %


 80%|████████  | 4/5 [06:07<01:31, 91.99s/it]

Testing Accuracy of the network on epoch 3: 67.840000 %
Training Accuracy of the network on epoch 4: 95.153333 %


100%|██████████| 5/5 [07:39<00:00, 91.88s/it]

Testing Accuracy of the network on epoch 4: 67.306667 %
New best:  0.6926933333333334
Testing 100.000000 of hidden_dim



  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 78.706667 %


 20%|██        | 1/5 [02:04<08:16, 124.11s/it]

Testing Accuracy of the network on epoch 0: 71.653333 %
Training Accuracy of the network on epoch 1: 87.326667 %


 40%|████      | 2/5 [04:06<06:10, 123.53s/it]

Testing Accuracy of the network on epoch 1: 71.760000 %
Training Accuracy of the network on epoch 2: 92.873333 %


 60%|██████    | 3/5 [06:08<04:06, 123.12s/it]

Testing Accuracy of the network on epoch 2: 69.440000 %
Training Accuracy of the network on epoch 3: 93.806667 %


 80%|████████  | 4/5 [08:11<02:03, 123.16s/it]

Testing Accuracy of the network on epoch 3: 67.466667 %
Training Accuracy of the network on epoch 4: 96.360000 %


100%|██████████| 5/5 [10:25<00:00, 125.01s/it]

Testing Accuracy of the network on epoch 4: 68.000000 %
New best:  0.69664
Testing 150.000000 of hidden_dim



  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 78.613333 %


 20%|██        | 1/5 [02:44<10:58, 164.60s/it]

Testing Accuracy of the network on epoch 0: 71.680000 %
Training Accuracy of the network on epoch 1: 87.193333 %


 40%|████      | 2/5 [05:27<08:12, 164.11s/it]

Testing Accuracy of the network on epoch 1: 71.973333 %
Training Accuracy of the network on epoch 2: 92.666667 %


 60%|██████    | 3/5 [08:12<05:28, 164.22s/it]

Testing Accuracy of the network on epoch 2: 69.973333 %
Training Accuracy of the network on epoch 3: 94.593333 %


 80%|████████  | 4/5 [10:54<02:43, 163.80s/it]

Testing Accuracy of the network on epoch 3: 68.746667 %
Training Accuracy of the network on epoch 4: 96.693333 %


100%|██████████| 5/5 [13:38<00:00, 163.66s/it]

Testing Accuracy of the network on epoch 4: 68.133333 %
New best:  0.7010133333333333
The best hidden_dim dimension is 150.000000. Accuracy is 0.701013
Testing 0.000000 of dropout



  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 78.606667 %


 20%|██        | 1/5 [02:44<10:56, 164.14s/it]

Testing Accuracy of the network on epoch 0: 72.053333 %
Training Accuracy of the network on epoch 1: 87.280000 %


 40%|████      | 2/5 [05:26<08:10, 163.64s/it]

Testing Accuracy of the network on epoch 1: 72.000000 %
Training Accuracy of the network on epoch 2: 92.793333 %


 60%|██████    | 3/5 [08:09<05:26, 163.36s/it]

Testing Accuracy of the network on epoch 2: 70.453333 %
Training Accuracy of the network on epoch 3: 94.953333 %


 80%|████████  | 4/5 [11:06<02:47, 167.51s/it]

Testing Accuracy of the network on epoch 3: 69.440000 %
Training Accuracy of the network on epoch 4: 96.786667 %


100%|██████████| 5/5 [13:55<00:00, 167.18s/it]

Testing Accuracy of the network on epoch 4: 68.853333 %
New best:  0.7056
Testing 0.100000 of dropout



  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 79.393333 %


 20%|██        | 1/5 [02:43<10:53, 163.32s/it]

Testing Accuracy of the network on epoch 0: 72.133333 %
Training Accuracy of the network on epoch 1: 87.560000 %


 40%|████      | 2/5 [05:24<08:08, 162.76s/it]

Testing Accuracy of the network on epoch 1: 72.266667 %
Training Accuracy of the network on epoch 2: 92.480000 %


 60%|██████    | 3/5 [08:06<05:24, 162.34s/it]

Testing Accuracy of the network on epoch 2: 70.080000 %
Training Accuracy of the network on epoch 3: 91.593333 %


 80%|████████  | 4/5 [10:47<02:41, 161.97s/it]

Testing Accuracy of the network on epoch 3: 66.746667 %
Training Accuracy of the network on epoch 4: 97.160000 %


100%|██████████| 5/5 [13:30<00:00, 162.05s/it]

Testing Accuracy of the network on epoch 4: 69.120000 %
Testing 0.300000 of dropout



  "num_layers={}".format(dropout, num_layers))
  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 78.653333 %


 20%|██        | 1/5 [02:41<10:45, 161.33s/it]

Testing Accuracy of the network on epoch 0: 71.653333 %
Training Accuracy of the network on epoch 1: 87.506667 %


 40%|████      | 2/5 [05:20<08:02, 160.76s/it]

Testing Accuracy of the network on epoch 1: 71.920000 %
Training Accuracy of the network on epoch 2: 92.380000 %


 60%|██████    | 3/5 [08:01<05:21, 160.73s/it]

Testing Accuracy of the network on epoch 2: 70.213333 %
Training Accuracy of the network on epoch 3: 92.493333 %


 80%|████████  | 4/5 [10:43<02:41, 161.07s/it]

Testing Accuracy of the network on epoch 3: 66.186667 %
Training Accuracy of the network on epoch 4: 93.253333 %


100%|██████████| 5/5 [13:28<00:00, 161.66s/it]

Testing Accuracy of the network on epoch 4: 65.973333 %
Testing 0.500000 of dropout



  "num_layers={}".format(dropout, num_layers))
  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 78.740000 %


 20%|██        | 1/5 [02:44<10:59, 164.82s/it]

Testing Accuracy of the network on epoch 0: 72.186667 %
Training Accuracy of the network on epoch 1: 87.120000 %


 40%|████      | 2/5 [05:26<08:11, 163.84s/it]

Testing Accuracy of the network on epoch 1: 71.706667 %
Training Accuracy of the network on epoch 2: 92.673333 %


 60%|██████    | 3/5 [08:08<05:26, 163.45s/it]

Testing Accuracy of the network on epoch 2: 70.240000 %
Training Accuracy of the network on epoch 3: 93.040000 %


 80%|████████  | 4/5 [10:55<02:44, 164.24s/it]

Testing Accuracy of the network on epoch 3: 67.733333 %
Training Accuracy of the network on epoch 4: 96.666667 %


100%|██████████| 5/5 [13:36<00:00, 163.32s/it]

Testing Accuracy of the network on epoch 4: 68.426667 %
Testing 0.700000 of dropout



  "num_layers={}".format(dropout, num_layers))
  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 78.666667 %


 20%|██        | 1/5 [02:42<10:51, 162.86s/it]

Testing Accuracy of the network on epoch 0: 71.866667 %
Training Accuracy of the network on epoch 1: 86.753333 %


 40%|████      | 2/5 [05:24<08:07, 162.50s/it]

Testing Accuracy of the network on epoch 1: 72.373333 %
Training Accuracy of the network on epoch 2: 92.286667 %


 60%|██████    | 3/5 [08:07<05:24, 162.50s/it]

Testing Accuracy of the network on epoch 2: 69.946667 %
Training Accuracy of the network on epoch 3: 92.633333 %


 80%|████████  | 4/5 [10:51<02:42, 162.99s/it]

Testing Accuracy of the network on epoch 3: 67.386667 %
Training Accuracy of the network on epoch 4: 97.280000 %


100%|██████████| 5/5 [13:33<00:00, 162.65s/it]

Testing Accuracy of the network on epoch 4: 67.893333 %
Testing 0.900000 of dropout



  "num_layers={}".format(dropout, num_layers))
  0%|          | 0/5 [00:00<?, ?it/s]

training
Training Accuracy of the network on epoch 0: 79.046667 %


 20%|██        | 1/5 [02:43<10:53, 163.26s/it]

Testing Accuracy of the network on epoch 0: 71.680000 %
Training Accuracy of the network on epoch 1: 87.840000 %


 40%|████      | 2/5 [05:26<08:09, 163.19s/it]

Testing Accuracy of the network on epoch 1: 72.480000 %
Training Accuracy of the network on epoch 2: 93.280000 %


 60%|██████    | 3/5 [08:08<05:25, 162.77s/it]

Testing Accuracy of the network on epoch 2: 70.586667 %
Training Accuracy of the network on epoch 3: 93.313333 %


 80%|████████  | 4/5 [10:56<02:44, 164.38s/it]

Testing Accuracy of the network on epoch 3: 68.240000 %
Training Accuracy of the network on epoch 4: 97.000000 %


100%|██████████| 5/5 [13:40<00:00, 164.11s/it]

Testing Accuracy of the network on epoch 4: 68.986667 %
The best dropout dimension is 0.000000. Accuracy is 0.705600





In [0]:
import json
with open(outpath+'/optimize_bilstm.json', 'w') as outfile2:
    json.dump(results, outfile2)