In [1]:
from data import CustomSNLIDataset
from torch.utils.data import DataLoader
import torch
from torchtext.vocab import GloVe

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [21]:
train = CustomSNLIDataset(split='validation')
test = CustomSNLIDataset(split='test')

Found cached dataset snli (/Users/tijmenvanetten/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b)
Loading cached processed dataset at /Users/tijmenvanetten/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b/cache-e956d94b29bc5096.arrow
Found cached dataset snli (/Users/tijmenvanetten/.cache/huggingface/datasets/snli/plain_text/1.0.0/1f60b67533b65ae0275561ff7828aad5ee4282d0e6f844fd148d05d3c6ea251b)
                                                                  

In [26]:
glove = GloVe(name='840B', dim=300)
text_pipeline = lambda x: glove.get_vecs_by_tokens(x, lower_case_backup=True)

In [27]:
def collate_batch(batch):
    premises, hypotheses, labels = [], [], []
    for premise, hypothesis, label in batch:
        premise = text_pipeline(premise)
        hypothesis = text_pipeline(hypothesis)
        hypotheses.append(hypothesis)
        premises.append(premise)
        labels.append(label)
    premises_len = [len(premise) for premise in premises]
    hypotheses_len = [len(hypothesis) for hypothesis in hypotheses]
    premises_padded = torch.nn.utils.rnn.pad_sequence(premises, batch_first=True)
    hypotheses_padded = torch.nn.utils.rnn.pad_sequence(hypotheses, batch_first=True)
    labels = torch.tensor(labels, dtype=torch.int64)
    return premises_padded.to(device), hypotheses_padded.to(device), labels.to(device), premises_len, hypotheses_len

In [28]:
train_loader = DataLoader(train, collate_fn=collate_batch, batch_size=5)
test_loader = DataLoader(test, collate_fn=collate_batch, batch_size=5)

(tensor([[[ 0.1938, -0.3427, -0.3728,  ..., -0.5123,  0.2869, -0.3872],
         [ 0.0266,  0.3142,  0.1537,  ..., -0.2472, -0.3425, -0.6229],
         [-0.1986, -0.0628, -0.3661,  ..., -0.5845,  0.2788, -0.2621],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.1938, -0.3427, -0.3728,  ..., -0.5123,  0.2869, -0.3872],
         [ 0.0266,  0.3142,  0.1537,  ..., -0.2472, -0.3425, -0.6229],
         [-0.1986, -0.0628, -0.3661,  ..., -0.5845,  0.2788, -0.2621],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.1938, -0.3427, -0.3728,  ..., -0.5123,  0.2869, -0.3872],
         [ 0.0266,  0.3142,  0.1537,  ..., -

In [4]:
import argparse

parser = argparse.ArgumentParser(description='NLI training')
# paths
parser.add_argument("--nlipath", type=str, default='dataset/SNLI/', help="NLI data path (SNLI or MultiNLI)")
parser.add_argument("--outputdir", type=str, default='savedir/', help="Output directory")
parser.add_argument("--outputmodelname", type=str, default='model.pickle')


# training
parser.add_argument("--n_epochs", type=int, default=20)
parser.add_argument("--batch_size", type=int, default=64)
parser.add_argument("--dpout_model", type=float, default=0., help="encoder dropout")
parser.add_argument("--dpout_fc", type=float, default=0., help="classifier dropout")
parser.add_argument("--nonlinear_fc", type=float, default=0, help="use nonlinearity in fc")
parser.add_argument("--optimizer", type=str, default="sgd,lr=0.1", help="adam or sgd,lr=0.1")
parser.add_argument("--lrshrink", type=float, default=5, help="shrink factor for sgd")
parser.add_argument("--decay", type=float, default=0.99, help="lr decay")
parser.add_argument("--minlr", type=float, default=1e-5, help="minimum lr")
parser.add_argument("--max_norm", type=float, default=5., help="max norm (grad clipping)")

# model
parser.add_argument("--encoder_type", type=str, default='BLSTMEncoder', help="see list of encoders")
parser.add_argument("--enc_lstm_dim", type=int, default=2048, help="encoder nhid dimension")
parser.add_argument("--n_enc_layers", type=int, default=1, help="encoder num layers")
parser.add_argument("--fc_dim", type=int, default=512, help="nhid of fc layers")
parser.add_argument("--n_classes", type=int, default=3, help="entailment/neutral/contradiction")
parser.add_argument("--pool_type", type=str, default='max', help="max or mean")

# gpu
parser.add_argument("--gpu_id", type=int, default=3, help="GPU ID")
parser.add_argument("--seed", type=int, default=1234, help="seed")


params, _ = parser.parse_known_args()

usage: ipykernel_launcher.py [-h] [--nlipath NLIPATH] [--outputdir OUTPUTDIR]
                             [--outputmodelname OUTPUTMODELNAME]
                             [--n_epochs N_EPOCHS] [--batch_size BATCH_SIZE]
                             [--dpout_model DPOUT_MODEL] [--dpout_fc DPOUT_FC]
                             [--nonlinear_fc NONLINEAR_FC]
                             [--optimizer OPTIMIZER] [--lrshrink LRSHRINK]
                             [--decay DECAY] [--minlr MINLR]
                             [--max_norm MAX_NORM]
                             [--encoder_type ENCODER_TYPE]
                             [--enc_lstm_dim ENC_LSTM_DIM]
                             [--n_enc_layers N_ENC_LAYERS] [--fc_dim FC_DIM]
                             [--n_classes N_CLASSES] [--pool_type POOL_TYPE]
                             [--gpu_id GPU_ID] [--seed SEED]
ipykernel_launcher.py: error: argument --fc_dim: invalid int value: '/Users/tijmenvanetten/Library/Jupyter/runtime/kernel-

AttributeError: 'tuple' object has no attribute 'tb_frame'