In [1]:
from ncn.model import *
from ncn.training import *

In [2]:
random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
# set up training
data = generate_bucketized_iterators("/home/jupyter/tutorials/seminar_kd/arxiv_data.csv")
PAD_IDX = data.ttl.vocab.stoi['<pad>']
cntxt_vocab_len = len(data.cntxt.vocab)
aut_vocab_len = len(data.aut.vocab)
ttl_vocab_len = len(data.ttl.vocab)

INFO:neural_citation.data:Getting fields...
INFO:neural_citation.data:Loading dataset...
INFO:neural_citation.data:Building vocab...


In [4]:
net = NeuralCitationNetwork(context_filters=[4,4,5], context_vocab_size=cntxt_vocab_len,
                            authors=True, author_filters=[1,2], author_vocab_size=aut_vocab_len,
                            title_vocab_size=ttl_vocab_len, pad_idx=PAD_IDX, num_layers=2)
net.to(DEVICE)
net.apply(init_weights)

INFO:neural_citation.ncn:INITIALIZING NEURAL CITATION NETWORK WITH AUTHORS = True
Running on: cuda
Number of model parameters: 23,535,028
Encoders: # Filters = 128, Context filter length = [4, 4, 5],  Context filter length = [1, 2]
Embeddings: Dimension = 128, Pad index = 1, Context vocab = 30002, Author vocab = 30002, Title vocab = 30004
Decoder: # GRU cells = 2, Hidden size = 128
Parameters: Dropout = 0.2
--------------------------


NeuralCitationNetwork(
  (encoder): NCNEncoder(
    (dropout): Dropout(p=0.2)
    (context_embedding): Embedding(30002, 128, padding_idx=1)
    (context_encoder): TDNNEncoder(
      (fc): Linear(in_features=384, out_features=384, bias=True)
      (bn): BatchNorm1d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (author_embedding): Embedding(30002, 128, padding_idx=1)
    (citing_author_encoder): TDNNEncoder(
      (fc): Linear(in_features=256, out_features=256, bias=True)
      (bn): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (cited_author_encoder): TDNNEncoder(
      (fc): Linear(in_features=256, out_features=256, bias=True)
      (bn): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (attention): Attention(
    (attn): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): Decoder(
    (attention): Attention(
      (attn): Linear(in_features=256, o

In [5]:
train_ncn(net, data.train_iter, data.valid_iter, PAD_IDX)

INFO:neural_citation.train:Epoch: 01 | Time: 10m 16s
INFO:neural_citation.train:	Train Loss: 1385.732
INFO:neural_citation.train:	 Val. Loss: 1278.524
INFO:neural_citation.train:Epoch: 02 | Time: 10m 15s
INFO:neural_citation.train:	Train Loss: 1245.059
INFO:neural_citation.train:	 Val. Loss: 1276.715
INFO:neural_citation.train:Epoch: 03 | Time: 10m 17s
INFO:neural_citation.train:	Train Loss: 1217.724
INFO:neural_citation.train:	 Val. Loss: 1257.811
INFO:neural_citation.train:Epoch: 04 | Time: 10m 19s
INFO:neural_citation.train:	Train Loss: 1045.098
INFO:neural_citation.train:	 Val. Loss: 1347.428
INFO:neural_citation.train:Epoch: 05 | Time: 10m 17s
INFO:neural_citation.train:	Train Loss: 985.361
INFO:neural_citation.train:	 Val. Loss: 1230.886
INFO:neural_citation.train:Epoch: 06 | Time: 10m 18s
INFO:neural_citation.train:	Train Loss: 981.736
INFO:neural_citation.train:	 Val. Loss: 1131.072
INFO:neural_citation.train:Epoch: 07 | Time: 10m 18s
INFO:neural_citation.train:	Train Loss: 980