In [1]:
from ncn.model import *
from ncn.training import *

In [2]:
random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
# set up training
data = get_bucketized_iterators("/home/jupyter/tutorials/seminar_kd/arxiv_data.csv")
PAD_IDX = data.ttl.vocab.stoi['<pad>']
cntxt_vocab_len = len(data.cntxt.vocab)
aut_vocab_len = len(data.aut.vocab)
ttl_vocab_len = len(data.ttl.vocab)

INFO:neural_citation.data:Getting fields...
INFO:neural_citation.data:Loading dataset...
INFO:neural_citation.data:Building vocab...


In [4]:
net = NeuralCitationNetwork(context_filters=[4,4,5],
                            author_filters=[1,2],
                            context_vocab_size=cntxt_vocab_len,
                            title_vocab_size=ttl_vocab_len,
                            author_vocab_size=aut_vocab_len,
                            pad_idx=PAD_IDX,
                            num_filters=256,
                            authors=True, 
                            embed_size=256,
                            num_layers=2,
                            hidden_size=256,
                            dropout_p=0.2,
                            show_attention=False)
net.to(DEVICE)
# net.apply(init_weights)

NeuralCitationNetwork(
  (encoder): NCNEncoder(
    (dropout): Dropout(p=0.2)
    (context_embedding): Embedding(30002, 256, padding_idx=1)
    (context_encoder): TDNNEncoder(
      (encoder): ModuleList(
        (0): TDNN(
          (conv): Conv2d(1, 256, kernel_size=(256, 4), stride=(1, 1), bias=False)
        )
        (1): TDNN(
          (conv): Conv2d(1, 256, kernel_size=(256, 4), stride=(1, 1), bias=False)
        )
        (2): TDNN(
          (conv): Conv2d(1, 256, kernel_size=(256, 5), stride=(1, 1), bias=False)
        )
      )
      (fc): Linear(in_features=768, out_features=768, bias=True)
    )
    (author_embedding): Embedding(30002, 256, padding_idx=1)
    (citing_author_encoder): TDNNEncoder(
      (encoder): ModuleList(
        (0): TDNN(
          (conv): Conv2d(1, 256, kernel_size=(256, 1), stride=(1, 1), bias=False)
        )
        (1): TDNN(
          (conv): Conv2d(1, 256, kernel_size=(256, 2), stride=(1, 1), bias=False)
        )
      )
      (fc): Linear(in

In [5]:
train_losses, valid_losses = train_model(net, data.train_iter, data.valid_iter, PAD_IDX)

INFO:neural_citation.train:INITIALIZING NEURAL CITATION NETWORK WITH AUTHORS = True
Running on: cuda
Number of model parameters: 49,199,156
Encoders: # Filters = 256, Context filter length = [4, 4, 5],  Context filter length = [1, 2]
Embeddings: Dimension = 256, Pad index = 1, Context vocab = 30002, Author vocab = 30002, Title vocab = 30004
Decoder: # GRU cells = 2, Hidden size = 256
Parameters: Dropout = 0.2, Show attention = False
-------------------------------------------------
TRAINING SETTINGS
Seed = 34, # Epochs = 40, Batch size = 32, Initial lr = 0.001


HBox(children=(IntProgress(value=0, description='Epochs', max=40, style=ProgressStyle(description_width='initi…

HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 01 | Time: 15m 26s
INFO:neural_citation.train:	Train Loss: 1284.053
INFO:neural_citation.train:	 Val. Loss: 1113.765


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 02 | Time: 15m 26s
INFO:neural_citation.train:	Train Loss: 1071.099
INFO:neural_citation.train:	 Val. Loss: 1009.832


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 03 | Time: 15m 29s
INFO:neural_citation.train:	Train Loss: 980.556
INFO:neural_citation.train:	 Val. Loss: 967.240


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 04 | Time: 15m 29s
INFO:neural_citation.train:	Train Loss: 937.932
INFO:neural_citation.train:	 Val. Loss: 945.741


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 05 | Time: 15m 29s
INFO:neural_citation.train:	Train Loss: 909.653
INFO:neural_citation.train:	 Val. Loss: 930.920


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 06 | Time: 15m 28s
INFO:neural_citation.train:	Train Loss: 889.516
INFO:neural_citation.train:	 Val. Loss: 921.334


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 07 | Time: 15m 27s
INFO:neural_citation.train:	Train Loss: 873.497
INFO:neural_citation.train:	 Val. Loss: 912.472


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 08 | Time: 15m 28s
INFO:neural_citation.train:	Train Loss: 859.881
INFO:neural_citation.train:	 Val. Loss: 904.804


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 09 | Time: 15m 30s
INFO:neural_citation.train:	Train Loss: 848.904
INFO:neural_citation.train:	 Val. Loss: 902.907


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 10 | Time: 15m 29s
INFO:neural_citation.train:	Train Loss: 839.159
INFO:neural_citation.train:	 Val. Loss: 896.140


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 11 | Time: 15m 27s
INFO:neural_citation.train:	Train Loss: 830.651
INFO:neural_citation.train:	 Val. Loss: 893.745


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 12 | Time: 15m 29s
INFO:neural_citation.train:	Train Loss: 823.101
INFO:neural_citation.train:	 Val. Loss: 890.117


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 13 | Time: 15m 30s
INFO:neural_citation.train:	Train Loss: 816.649
INFO:neural_citation.train:	 Val. Loss: 888.303


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 14 | Time: 15m 32s
INFO:neural_citation.train:	Train Loss: 810.521
INFO:neural_citation.train:	 Val. Loss: 885.998


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 15 | Time: 15m 31s
INFO:neural_citation.train:	Train Loss: 805.446
INFO:neural_citation.train:	 Val. Loss: 883.658


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 16 | Time: 15m 30s
INFO:neural_citation.train:	Train Loss: 800.984
INFO:neural_citation.train:	 Val. Loss: 884.808


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 17 | Time: 15m 31s
INFO:neural_citation.train:	Train Loss: 797.174
INFO:neural_citation.train:	 Val. Loss: 883.951


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 18 | Time: 15m 32s
INFO:neural_citation.train:	Train Loss: 793.071
INFO:neural_citation.train:	 Val. Loss: 880.532


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 19 | Time: 15m 29s
INFO:neural_citation.train:	Train Loss: 790.031
INFO:neural_citation.train:	 Val. Loss: 883.306


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 20 | Time: 15m 29s
INFO:neural_citation.train:	Train Loss: 786.336
INFO:neural_citation.train:	 Val. Loss: 880.822


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 21 | Time: 15m 31s
INFO:neural_citation.train:	Train Loss: 783.034
INFO:neural_citation.train:	 Val. Loss: 883.267


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 22 | Time: 15m 32s
INFO:neural_citation.train:	Train Loss: 780.802
INFO:neural_citation.train:	 Val. Loss: 879.175


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 23 | Time: 15m 33s
INFO:neural_citation.train:	Train Loss: 777.931
INFO:neural_citation.train:	 Val. Loss: 878.796


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 24 | Time: 15m 31s
INFO:neural_citation.train:	Train Loss: 775.496
INFO:neural_citation.train:	 Val. Loss: 877.949


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 25 | Time: 15m 30s
INFO:neural_citation.train:	Train Loss: 773.254
INFO:neural_citation.train:	 Val. Loss: 878.446


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 26 | Time: 15m 30s
INFO:neural_citation.train:	Train Loss: 772.194
INFO:neural_citation.train:	 Val. Loss: 879.030


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 27 | Time: 15m 30s
INFO:neural_citation.train:	Train Loss: 770.574
INFO:neural_citation.train:	 Val. Loss: 878.022


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 28 | Time: 15m 32s
INFO:neural_citation.train:	Train Loss: 768.908
INFO:neural_citation.train:	 Val. Loss: 878.870


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 29 | Time: 15m 31s
INFO:neural_citation.train:	Train Loss: 767.263
INFO:neural_citation.train:	 Val. Loss: 880.783


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 30 | Time: 15m 32s
INFO:neural_citation.train:	Train Loss: 765.352
INFO:neural_citation.train:	 Val. Loss: 878.327


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 31 | Time: 15m 35s
INFO:neural_citation.train:	Train Loss: 763.968
INFO:neural_citation.train:	 Val. Loss: 880.112


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 32 | Time: 15m 33s
INFO:neural_citation.train:	Train Loss: 762.418
INFO:neural_citation.train:	 Val. Loss: 877.122


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 33 | Time: 15m 31s
INFO:neural_citation.train:	Train Loss: 760.590
INFO:neural_citation.train:	 Val. Loss: 876.713


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 34 | Time: 15m 30s
INFO:neural_citation.train:	Train Loss: 758.841
INFO:neural_citation.train:	 Val. Loss: 879.265


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 35 | Time: 15m 32s
INFO:neural_citation.train:	Train Loss: 757.692
INFO:neural_citation.train:	 Val. Loss: 879.201


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 36 | Time: 15m 32s
INFO:neural_citation.train:	Train Loss: 757.163
INFO:neural_citation.train:	 Val. Loss: 878.770


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 37 | Time: 15m 31s
INFO:neural_citation.train:	Train Loss: 756.031
INFO:neural_citation.train:	 Val. Loss: 880.685


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 38 | Time: 15m 32s
INFO:neural_citation.train:	Train Loss: 754.954
INFO:neural_citation.train:	 Val. Loss: 880.390


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 39 | Time: 15m 30s
INFO:neural_citation.train:	Train Loss: 753.522
INFO:neural_citation.train:	 Val. Loss: 881.147


HBox(children=(IntProgress(value=0, description='Training batches', max=12559, style=ProgressStyle(description…

HBox(children=(IntProgress(value=0, description='Evaluating batches', max=1570, style=ProgressStyle(descriptio…

INFO:neural_citation.train:Epoch: 40 | Time: 15m 31s
INFO:neural_citation.train:	Train Loss: 752.447
INFO:neural_citation.train:	 Val. Loss: 877.921



