In [41]:
import torch
from torchtext import data
from torchtext import datasets
import spacy

In [42]:
SEED = 42
torch.manual_seed(SEED)

text_field = data.Field(tokenize='spacy', include_lengths=True)
label_field = data.LabelField(dtype=torch.float)

In [43]:
train_data, test_data = datasets.IMDB.splits(text_field, label_field)

In [44]:
import random

train_data, valid_data = train_data.split(random_state = random.seed(SEED))

In [45]:
MAX_VOCAB_SIZE = 25_000
EMBEDDINGS_FILE = 'glove.6B.100d'

text_field.build_vocab(train_data, 
                       max_size = MAX_VOCAB_SIZE, 
                       vectors = EMBEDDINGS_FILE, 
                       unk_init = torch.Tensor.normal_)

label_field.build_vocab(train_data)

In [54]:
from argparse import Namespace

args = Namespace(
    # Data and Path hyper parameters
    embeddings_file='imdb_embeddings.pkl',
    model_state_file='imdb_model.torch',
    log_file='imdb.log',
    train_state_file='train_state.json',
    save_dir='.save/imdb/',
    PAD_IDX = text_field.vocab.stoi[text_field.pad_token],
    UNK_IDX = text_field.vocab.stoi[text_field.unk_token],
    # Model hyper parameters
    input_dim = len(text_field.vocab),
    embedding_dim=100,
    hidden_dim=256,
    output_dim = 1,
    num_layers=2,
    # Training hyper parameter
    seed=42,
    learning_rate=0.001,
    dropout_p=0.5,
    batch_size=64,
    num_epochs=5,
    early_stopping_criteria=5,
    # Runtime option
    reload_from_files=True,
    expand_filepaths_to_save_dir=True,
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size=args.batch_size,
    sort_within_batch=True,
    device=args.device)


pretrained_embeddings = text_field.vocab.vectors
pretrained_embeddings[args.UNK_IDX] = torch.zeros(args.embedding_dim)
pretrained_embeddings[args.PAD_IDX] = torch.zeros(args.embedding_dim)

In [55]:
from models import BiLSTM

model = BiLSTM(
    args.input_dim, 
    args.embedding_dim, 
    args.hidden_dim, 
    args.output_dim, 
    args.num_layers,
    pretrained_embeddings,
    args.dropout_p, 
    args.PAD_IDX
)

In [57]:
import torch.nn as nn
import torch.optim as optim

loss_func = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                 mode='min', factor=0.5,
                                                 patience=1)

In [67]:
iterator = dict(train=train_iterator, test=test_iterator, valid=valid_iterator)
run(args, model, loss_func,
               optimizer, scheduler, iterator)

AttributeError: 'BiLSTM' object has no attribute 'embedding'