# Sample Code for Neural Model


In [8]:
import sys
import argparse
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from random import shuffle


# Word Embedding
Make the input word as a one-hot vector (length of the vector is the size of the vocabulary. The index of the word in the vocabulary is the index of the vector where the value is 1). Then this vector is fed into an LSTM cell, the output hidden vector of the LSTM cell is hence the word embedding.

In [9]:
class Embedding(nn.Module):
    def __init__(self, vocab, hidden_size): # vocab is a dictionary, key: words, value: integer index
        super(Embedding, self).__init__()
        self.vocab = vocab
        self.vocab_size = len(vocab)
        self.hidden_size = hidden_size
        self.lstm = nn.LSTMCell(self.vocab_size, self.hidden_size)
        self.lstm_init = nn.Parameter(torch.rand(1, self.hidden_size))
        
    def forward(self, x):
        cx = self.lstm_init
        hx = torch.tanh(cx)
        ix = torch.zeros(self.vocab_size)
        ix[self.vocab[x]] = 1 # one-hot vector input
        hx, cx = self.lstm(Variable(ix), (hx, cx))
        return hx

# Neural Model for P(A | N, C)
We build the following neural probability model:
$$P(A=a_{j}\ |\ C = c_{m}, N=n_{i}) = softmax(\ a_{j}^{T}tanh(W[c_{m}; n_{i}]^{T} + b))$$

In [10]:
class Model(nn.Module):
    def __init__(self, noun_embedding, classifier_embedding, input_size, hidden_size, output_size):
        super(Model, self).__init__()
        self.noun_embedding = noun_embedding
        self.classifier_embedding = classifier_embedding
        self.inner_linear = nn.Linear(input_size, hidden_size)
        self.tanh = nn.Tanh()
        self.outer_linear = nn.Linear(hidden_size, output_size, bias=False)
        self.log_softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x): # input is one (noun, adj, classifier) tuple
        noun, adj, classifier = x
        noun_vec = self.noun_embedding(noun)
        classifier_vec = self.classifier_embedding(classifier)
        input_vec = torch.cat((classifier_vec, noun_vec), dim=1)
        output_vec = self.inner_linear(input_vec) # The W matrix multiplication
        output_vec = self.tanh(output_vec)
        output_vec = self.outer_linear(output_vec) # Multiply by adjective embeddings, output size is the size of the 
                                                   # adjective vocabulary 
        return self.log_softmax(output_vec)        # Return the log probabilities of each adjective given input noun 
                                                   # and classifier 
        

# Training Function

In [17]:
def train(model, train_dataset, target_vocab, optimizer, epoch, log_interval):
    model.train()
    shuffle(train_dataset)
    accumulate_loss = 0.0
    print("start training epoch %d" % (epoch+1))
    for (idx, word_tuple) in enumerate(train_dataset):
        target_word = word_tuple[1] # target word is the word whose probability we are interested
        optimizer.zero_grad()
        output = model(word_tuple)
        target = Variable(torch.LongTensor([target_vocab[target_word]]))
        loss = F.nll_loss(output, target) # loss calculated
        accumulate_loss += loss.data[0]
        loss.backward()
        optimizer.step()
        if idx % log_interval == 0:
            print("trained %d instances, average loss is %.4f" % (idx+1, accumulate_loss / (idx+1)))
    print("finish training epoch %d, average loss of this epoch is %.4f" % (epoch+1, accumulate_loss / len(train_dataset)))

# Helper Methods

In [12]:
def make_vocabs(data):
    noun_vocab = {}
    adj_vocab = {}
    classifier_vocab = {}
    for word_tuple in data:
        if word_tuple[0] not in noun_vocab:
            noun_vocab[word_tuple[0]] = len(noun_vocab)
        if word_tuple[1] not in adj_vocab:
            adj_vocab[word_tuple[1]] = len(adj_vocab)
        if word_tuple[2] not in classifier_vocab:
            classifier_vocab[word_tuple[2]] = len(classifier_vocab)
    
    return noun_vocab, adj_vocab, classifier_vocab

def make_data_list(data_counter):
    data_list = []
    for data_point in data_counter:
        for i in range(data_counter[data_point]):
            data_list.append(data_point)
    return data_list


        

In [16]:
def main():
    
    # load data
    train_data_counter = pickle.load(open("train_noun_adj_classifier.pkl", "rb"))
    train_data = make_data_list(train_data_counter)
    noun_vocab, adj_vocab, classifier_vocab = make_vocabs(train_data)
    
    # set hyperparameters (will change to be passed in by arguments later)
    noun_emb_size = 100
    classifier_emb_size = 50
    neural_model_hidden_size = 16
    learning_rate = 0.1
    max_epoch = 3
    log_interval = 100
    
    # build models
    noun_embedding = Embedding(noun_vocab, noun_emb_size)
    classifier_embedding = Embedding(classifier_vocab, classifier_emb_size)
    model = Model(noun_embedding, classifier_embedding, noun_emb_size + classifier_emb_size, neural_model_hidden_size, len(adj_vocab))
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    
    # train
    for epoch in range(max_epoch):
        train(model, train_data, adj_vocab, optimizer, epoch, log_interval)
    
        
if __name__ == "__main__":
    main()
    
    
    

40989
start training epoch 1
trained 1 instances, average loss is 8.8291
trained 101 instances, average loss is 8.7429
trained 201 instances, average loss is 8.5803
trained 301 instances, average loss is 8.4998
trained 401 instances, average loss is 8.3801
trained 501 instances, average loss is 8.3186
trained 601 instances, average loss is 8.2058
trained 701 instances, average loss is 8.1830
trained 801 instances, average loss is 8.1560
trained 901 instances, average loss is 8.1257
trained 1001 instances, average loss is 8.0712
trained 1101 instances, average loss is 8.0467
trained 1201 instances, average loss is 8.0119
trained 1301 instances, average loss is 7.9827
trained 1401 instances, average loss is 7.9443
trained 1501 instances, average loss is 7.9162
trained 1601 instances, average loss is 7.8980
trained 1701 instances, average loss is 7.8623
trained 1801 instances, average loss is 7.8451
trained 1901 instances, average loss is 7.8242
trained 2001 instances, average loss is 7.8

trained 17301 instances, average loss is 7.2657
trained 17401 instances, average loss is 7.2671
trained 17501 instances, average loss is 7.2654
trained 17601 instances, average loss is 7.2670
trained 17701 instances, average loss is 7.2649
trained 17801 instances, average loss is 7.2647
trained 17901 instances, average loss is 7.2623
trained 18001 instances, average loss is 7.2621
trained 18101 instances, average loss is 7.2627
trained 18201 instances, average loss is 7.2627
trained 18301 instances, average loss is 7.2633
trained 18401 instances, average loss is 7.2611
trained 18501 instances, average loss is 7.2601
trained 18601 instances, average loss is 7.2589
trained 18701 instances, average loss is 7.2580
trained 18801 instances, average loss is 7.2554
trained 18901 instances, average loss is 7.2548
trained 19001 instances, average loss is 7.2546
trained 19101 instances, average loss is 7.2525
trained 19201 instances, average loss is 7.2486
trained 19301 instances, average loss is

trained 34401 instances, average loss is 7.1504
trained 34501 instances, average loss is 7.1495
trained 34601 instances, average loss is 7.1498
trained 34701 instances, average loss is 7.1496
trained 34801 instances, average loss is 7.1491
trained 34901 instances, average loss is 7.1481
trained 35001 instances, average loss is 7.1475
trained 35101 instances, average loss is 7.1458
trained 35201 instances, average loss is 7.1463
trained 35301 instances, average loss is 7.1455
trained 35401 instances, average loss is 7.1446
trained 35501 instances, average loss is 7.1445
trained 35601 instances, average loss is 7.1440
trained 35701 instances, average loss is 7.1436
trained 35801 instances, average loss is 7.1428
trained 35901 instances, average loss is 7.1417
trained 36001 instances, average loss is 7.1421
trained 36101 instances, average loss is 7.1415
trained 36201 instances, average loss is 7.1407
trained 36301 instances, average loss is 7.1389
trained 36401 instances, average loss is

trained 10601 instances, average loss is 6.7413
trained 10701 instances, average loss is 6.7403
trained 10801 instances, average loss is 6.7398
trained 10901 instances, average loss is 6.7395
trained 11001 instances, average loss is 6.7364
trained 11101 instances, average loss is 6.7388
trained 11201 instances, average loss is 6.7395
trained 11301 instances, average loss is 6.7413
trained 11401 instances, average loss is 6.7438
trained 11501 instances, average loss is 6.7487
trained 11601 instances, average loss is 6.7481
trained 11701 instances, average loss is 6.7449
trained 11801 instances, average loss is 6.7460
trained 11901 instances, average loss is 6.7450
trained 12001 instances, average loss is 6.7477
trained 12101 instances, average loss is 6.7514
trained 12201 instances, average loss is 6.7524
trained 12301 instances, average loss is 6.7489
trained 12401 instances, average loss is 6.7480
trained 12501 instances, average loss is 6.7502
trained 12601 instances, average loss is

trained 27701 instances, average loss is 6.7786
trained 27801 instances, average loss is 6.7784
trained 27901 instances, average loss is 6.7778
trained 28001 instances, average loss is 6.7765
trained 28101 instances, average loss is 6.7763
trained 28201 instances, average loss is 6.7758
trained 28301 instances, average loss is 6.7752
trained 28401 instances, average loss is 6.7755
trained 28501 instances, average loss is 6.7757
trained 28601 instances, average loss is 6.7742
trained 28701 instances, average loss is 6.7754
trained 28801 instances, average loss is 6.7744
trained 28901 instances, average loss is 6.7743
trained 29001 instances, average loss is 6.7752
trained 29101 instances, average loss is 6.7759
trained 29201 instances, average loss is 6.7768
trained 29301 instances, average loss is 6.7750
trained 29401 instances, average loss is 6.7755
trained 29501 instances, average loss is 6.7755
trained 29601 instances, average loss is 6.7755
trained 29701 instances, average loss is

trained 3701 instances, average loss is 6.4437
trained 3801 instances, average loss is 6.4469
trained 3901 instances, average loss is 6.4550
trained 4001 instances, average loss is 6.4582
trained 4101 instances, average loss is 6.4744
trained 4201 instances, average loss is 6.4773
trained 4301 instances, average loss is 6.4777
trained 4401 instances, average loss is 6.4808
trained 4501 instances, average loss is 6.4808
trained 4601 instances, average loss is 6.4816
trained 4701 instances, average loss is 6.4850
trained 4801 instances, average loss is 6.4876
trained 4901 instances, average loss is 6.4876
trained 5001 instances, average loss is 6.4823
trained 5101 instances, average loss is 6.4933
trained 5201 instances, average loss is 6.4965
trained 5301 instances, average loss is 6.4998
trained 5401 instances, average loss is 6.5030
trained 5501 instances, average loss is 6.5104
trained 5601 instances, average loss is 6.5192
trained 5701 instances, average loss is 6.5200
trained 5801 

trained 20901 instances, average loss is 6.5141
trained 21001 instances, average loss is 6.5133
trained 21101 instances, average loss is 6.5115
trained 21201 instances, average loss is 6.5123
trained 21301 instances, average loss is 6.5132
trained 21401 instances, average loss is 6.5144
trained 21501 instances, average loss is 6.5152
trained 21601 instances, average loss is 6.5160
trained 21701 instances, average loss is 6.5161
trained 21801 instances, average loss is 6.5176
trained 21901 instances, average loss is 6.5147
trained 22001 instances, average loss is 6.5174
trained 22101 instances, average loss is 6.5170
trained 22201 instances, average loss is 6.5149
trained 22301 instances, average loss is 6.5146
trained 22401 instances, average loss is 6.5142
trained 22501 instances, average loss is 6.5139
trained 22601 instances, average loss is 6.5141
trained 22701 instances, average loss is 6.5126
trained 22801 instances, average loss is 6.5129
trained 22901 instances, average loss is

trained 38001 instances, average loss is 6.5177
trained 38101 instances, average loss is 6.5169
trained 38201 instances, average loss is 6.5167
trained 38301 instances, average loss is 6.5163
trained 38401 instances, average loss is 6.5174
trained 38501 instances, average loss is 6.5190
trained 38601 instances, average loss is 6.5186
trained 38701 instances, average loss is 6.5172
trained 38801 instances, average loss is 6.5163
trained 38901 instances, average loss is 6.5157
trained 39001 instances, average loss is 6.5154
trained 39101 instances, average loss is 6.5153
trained 39201 instances, average loss is 6.5166
trained 39301 instances, average loss is 6.5164
trained 39401 instances, average loss is 6.5166
trained 39501 instances, average loss is 6.5178
trained 39601 instances, average loss is 6.5178
trained 39701 instances, average loss is 6.5168
trained 39801 instances, average loss is 6.5159
trained 39901 instances, average loss is 6.5166
trained 40001 instances, average loss is