In [1]:
#! /usr/bin/env python
import os
import argparse
import datetime
import re
import numpy as np
import random


In [2]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim

In [3]:
from torchtext import data
from visdom import Visdom
viz = Visdom()

In [4]:
import matplotlib.pyplot as plt

In [5]:
from data import tokenizer, data_split, preprocess_dataset, create_batches
from data import SemEval10_task8

In [6]:
def l2_loss(parameters):
    norm = 0
    for param in parameters:
        norm += torch.sum((param**2))
    return norm

In [7]:
class Baseline_Model(nn.Module):
    def __init__(self, word_vocab, label_vocab, word_emb_dim, pos_emb_dim, hidden_dim, output_dim, MAX_POS = 15, use_gpu = True):
        super(Baseline_Model, self).__init__()
        
        # Set hyper parameters
        self.word_emb_dim = word_emb_dim
        self.pos_emb_dim = pos_emb_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.input_dim = word_emb_dim
        
        self.MAX_POS = MAX_POS
        
        
        # Set options and other parameters
        self.use_gpu = use_gpu
        self.word_vocab = word_vocab
        self.label_vocab = label_vocab
        #self.pos_vocab = pos_vocab
        
        
        # Free parameters for the model
        # Initialize embeddings (Word and Position embeddings) 
        self.word_emb = nn.Embedding(len(self.word_vocab), self.word_emb_dim).cuda()
        
        self.pos1_emb = nn.Embedding(self.MAX_POS*2+1, self.pos_emb_dim).cuda()
        self.pos1_emb.weight.data.uniform_(-0.00, 0.00)
        self.pos2_emb = nn.Embedding(self.MAX_POS*2+1, self.pos_emb_dim).cuda()
        self.pos2_emb.weight.data.uniform_(-0.00, 0.00)
        
        # Initialize LSTM parameters ()
        self.rnn = nn.GRU(self.input_dim +  self.pos_emb_dim * 2, hidden_dim, bidirectional=True, batch_first = True).cuda()
        
        
        # Initialize Attention parameters ()
        self.attention_hidden = nn.Linear(hidden_dim * 2, hidden_dim,bias=False).cuda()
        self.attention = nn.Linear(hidden_dim, 1, bias=False).cuda()
        
        # Initialize Classifier parameters ()
        #self.classifier_hidden = nn.Linear(hidden_dim * 2, hidden_dim).cuda()
        self.classifier = nn.Linear(hidden_dim * 2, output_dim).cuda()
        
        
        self.word_emb.weight.data.copy_(word_vocab.vectors)
    def prepare_inout(self, X, y=None):
        sents, pos1, pos2 = list(zip(*X))
        #sents = list(zip(*X))
        #pos1 = datas['position_indices_1']
        #pos2 = datas['position_indices_2']
        
        labels = y
        
        words = [ [ self.word_vocab.stoi[word] for word in sent] for sent in sents]
        #print(words)
        words_var = Variable(torch.LongTensor(words).cuda())
        #print(words_var)
        word_embeddings = self.word_emb(words_var)
        #print(word_embedings)
        
        pos1 = np.array(pos1).astype('int')
        #print(pos1)
        pos1_var = Variable(torch.LongTensor(pos1).cuda())
        pos1_embeddings = self.pos1_emb(pos1_var)
        #print(pos1_var)
        #print(pos1_embeddings)

        pos2 = np.array(pos2).astype('int')
        #print(pos2)
        pos2_var = Variable(torch.LongTensor(pos2).cuda())
        pos2_embeddings = self.pos2_emb(pos2_var)
        
        
        inputs = {'word_embeddings': word_embeddings,
                  'pos1_embeddings': pos1_embeddings,
                  'pos2_embeddings': pos2_embeddings}
        #inputs = torch.cat((word_embeddings, pos1_embeddings, pos2_embeddings),-1)
        #print(inputs)
        
        """print(labels)
        if labels:
            labels = [ self.label_vocab.stoi[label] - 1 for label in labels]
            labels_var = Variable(torch.LongTensor(labels).cuda())
            outputs = labels_var
        else:
            outputs = None
        """
        labels = [ self.label_vocab.stoi[label] - 1 for label in labels]
        labels_var = Variable(torch.LongTensor(labels).cuda())
        outputs = labels_var
        return inputs, outputs
        
        
        
        
    def forward(self, inputs, is_train = True):
        word_embeddings = inputs['word_embeddings']
        pos1_embeddings = inputs['pos1_embeddings']
        pos2_embeddings = inputs['pos2_embeddings']
        
        # LSTM layer
        X = torch.cat((word_embeddings, pos1_embeddings, pos2_embeddings),-1)
        X = F.dropout(X, p=0.5, training=is_train)
        hiddens, for_output = self.rnn(X)
        #rev_hiddens, rev_output = self.rev_lstm(X)
        hiddens = F.dropout(hiddens, p=0.5, training=is_train)
        
        # Self Attentive layer
        att_hidden = F.tanh(self.attention_hidden(hiddens))
        att_hidden = F.dropout(att_hidden, p=0.5, training=is_train)
        att_scores = self.attention(att_hidden)
        
        attention_distrib = F.softmax(att_scores, dim = 1)
        context_vector = F.tanh(torch.sum(hiddens * attention_distrib, dim = 1))

        # Classifier
        #context_hidden = self.classifier_hidden(context_vector)
        #context_hidden = F.dropout(context_hidden, p=0.5, training=is_train)
        
        finals = F.softmax(self.classifier(context_vector), dim = 1)
        if is_train:
            return finals
        else:
            return finals, context_vector
    
    def evaluatation(self, input, output, demonstrate_result = True, analyze = False, header=""):
        batch_Xs, batch_ys = create_batches(input, output, 128, shuffle=False)
        
        loss_func = nn.CrossEntropyLoss()
        epoch_loss = 0
        tp = 0
        gold_answer = []
        pred_answer = []
        for batch_X, batch_Y in zip(batch_Xs, batch_ys):
            X, Y = self.prepare_inout(batch_X, batch_Y)
            preds, attention_distribs = relation_extr(X, is_train = False)
            _, preds_Y = torch.max(preds, -1)
            tp += (preds_Y == Y).float().sum().data.cpu().numpy()[0]
            loss = loss_func(preds, Y)
            epoch_loss += loss.data.cpu().numpy()[0] * len(batch_X)
            
            for y, preds_y in zip(Y, preds_Y):
                y = y.data.cpu().numpy()[0] + 1
                preds_y = preds_y.data.cpu().numpy()[0] + 1
                
                gold_answer.append(label_vocab.itos[y])
                pred_answer.append(label_vocab.itos[preds_y])
            if analyze:
                for x, y, preds_y, pred_distirb, attention_distrib in zip(batch_X, batch_Y, preds_Y, preds, attention_distribs):
                    input_str = " ".join(x[0])
                    preds_y = preds_y.data.cpu().numpy()[0] + 1
                    preds = pred_distirb.data.cpu().numpy()
                    
                    print()
                    
                        
                    if y != self.label_vocab.itos[preds_y]:
                        print("input sentence: " + input_str)
                        print('answer label: ', " ",y)
                        print('wrong label: ', " ",self.label_vocab.itos[preds_y])
                        for i in range(len(preds)):
                            print(self.label_vocab.itos[i+1],"\t:\t",preds[i])
                        print()
                        #print(x[0])
                        #print(attention_distrib.data.cpu().numpy())
                        plt.figure(figsize=(20,5))
                        plt.xticks(range(len(x[0])), x[0],  rotation=30)
                        plt.plot(range(len(x[0])), attention_distrib.data.cpu().numpy(), 'ro')
                        plt.ylim(0, 1.0)
                        plt.show()
            del X,Y, loss, preds, attention_distribs
            
        if demonstrate_result:
            #print('Avg loss: ')
            print(header + " accuracy:\t%f"%(tp/float(len(output))))
            print(header + " loss:    \t%f"%(epoch_loss/float(len(output))))
            
            # code for official evaluataion_code
            fgold = open('./dataset/SemEval2010_task8_all_data/SemEval2010_task8_scorer-v1.2/gold_answer.txt','w')
            fpred = open('./dataset/SemEval2010_task8_all_data/SemEval2010_task8_scorer-v1.2/pred_answer.txt','w')
            i = 0
            for gold, pred in zip(gold_answer, pred_answer):
                fgold.write("%i\t%s\n"%(i,gold))
                fpred.write("%i\t%s\n"%(i,pred))
                
                i += 1
            #print('Macro F1-score')
            #print('Micro F1-score')
        
        return {'loss': epoch_loss/float(len(output))}

In [8]:
MAX_POS = 15
train, dev = data_split(SemEval10_task8(sub_path='SemEval2010_task8_training/TRAIN_FILE.TXT'), test_rate = 0.1)
test = SemEval10_task8(sub_path='SemEval2010_task8_testing_keys/TEST_FILE_FULL.TXT')

train_input, train_output = preprocess_dataset(train, MAX_POS, entity_normalize = False, directional_consideration = True)
train_words = list(zip(*train_input))[0]

dev_input, dev_output = preprocess_dataset(dev, MAX_POS, entity_normalize = False, directional_consideration = True)
dev_words = list(zip(*dev_input))[0]

test_input, test_output = preprocess_dataset(test, MAX_POS, entity_normalize = False, directional_consideration = True)
test_words = list(zip(*test_input))[0]

In [9]:
TEXT = data.Field(sequential=True,  lower=False)
TEXT.build_vocab(train_words+test_words+dev_words, vectors="glove.840B.300d")
word_vocab = TEXT.vocab

LABEL = data.Field(sequential=False, use_vocab=False)
LABEL.build_vocab(train_output+test_output+dev_output)
label_vocab = LABEL.vocab

In [10]:
relation_extr = torch.load("./model/nn_model")

In [11]:
import GPy

plt.rcParams["figure.figsize"] = [40.0, 20.0]

rel_list = list(relation_extr.label_vocab.stoi.keys())[1:]

In [12]:
rel_list

['Other',
 'Entity-Destination(e1,e2)',
 'Cause-Effect(e2,e1)',
 'Member-Collection(e2,e1)',
 'Entity-Origin(e1,e2)',
 'Message-Topic(e1,e2)',
 'Component-Whole(e1,e2)',
 'Component-Whole(e2,e1)',
 'Instrument-Agency(e2,e1)',
 'Content-Container(e1,e2)',
 'Product-Producer(e2,e1)',
 'Cause-Effect(e1,e2)',
 'Product-Producer(e1,e2)',
 'Content-Container(e2,e1)',
 'Entity-Origin(e2,e1)',
 'Message-Topic(e2,e1)',
 'Instrument-Agency(e1,e2)',
 'Member-Collection(e1,e2)',
 'Entity-Destination(e2,e1)']

In [23]:

for rel in rel_list:
    #if rel != 'Other':
    #    continue
        
    max_batch_size = 16

    h_list = []
    o_list = []

    batch_Xs, batch_ys = create_batches(train_input, train_output, max_batch_size, shuffle=False)
    for batch_X, batch_y in zip(batch_Xs, batch_ys):
        i, o = relation_extr.prepare_inout(batch_X, batch_y)
        o_list += o.cpu().data.numpy().tolist()
        o, h = relation_extr(i, is_train = False)
        h_list += h.cpu().data.numpy().tolist()
    
    new_o_list = []
    for i in o_list:
        if i == 19:
            print(i)
        tmp = [0] * (len(relation_extr.label_vocab.stoi) - 1)
        tmp[i] = 1
        new_o_list.append(tmp)
    
    m = GPy.models.GPRegression(np.array(h_list),np.array(new_o_list)[:, relation_extr.label_vocab.stoi[rel] - 1].reshape([-1, 1]))
    m.update_model(False)
    m[:] = np.load("./model/GP_model_" + rel + ".npy")   
    m.update_model(True)
    
    i_list = []
    h_list = []
    o_list = []

    batch_Xs, batch_ys = create_batches(test_input, test_output, max_batch_size, shuffle=False)
    for batch_X, batch_y in zip(batch_Xs, batch_ys):
        i, o = relation_extr.prepare_inout(batch_X, batch_y)
        i_list += batch_X[:, 0].tolist()
        o_list += o.cpu().data.numpy().tolist()
        o, h = relation_extr(i, is_train = False)
        h_list += h.cpu().data.numpy().tolist()

    new_o_list = []
    for i in o_list:
        tmp = [0] * (len(relation_extr.label_vocab.stoi) - 1)
        tmp[i] = 1
        new_o_list.append(tmp)

    mean, var = m.predict(np.array(h_list)[:])
    
    var
    print(rel)
    print(" ".join(i_list[np.argmax(var[np.array(new_o_list)[:, relation_extr.label_vocab.stoi[rel] - 1] == 1])]))
    print(var[np.argmax(var[np.array(new_o_list)[:, relation_extr.label_vocab.stoi[rel] - 1] == 1])])
    print(" ".join(i_list[np.argmin(var[np.array(new_o_list)[:, relation_extr.label_vocab.stoi[rel] - 1] == 1])]))
    print(var[np.argmin(var[np.array(new_o_list)[:, relation_extr.label_vocab.stoi[rel] - 1] == 1])])
    
    #break



Other
The class <e1> teacher </e1> is part of the school <e2> team </e2> , ensuring the unit is well integrated into the school , whilst still preserving the Montessori character and method of education .
[0.0220716]
Speculations on the causes behind the below capacity production of <e1> ethanol </e1> by the sugar <e2> factories </e2> in the state figured in the Legislative Council on Tuesday .
[0.02140842]
Entity-Destination(e1,e2)
Flanking or backing <e1> rudders </e1> are used by <e2> towboats </e2> and other vessels that require a high degree of manoeuvrability .
[0.0333387]
I changed the length of the <e1> catapult </e1> <e2> arm </e2> and I changed the rubber band .
[0.03571132]
Cause-Effect(e2,e1)
Mud-plastered walls , traditional display of art , handicrafts from all parts of the nation and the exclusivity of each theme state every year makes the Surajkund Mela stand apart in the <e1> league </e1> of various <e2> exhibitions </e2> of arts and crafts .
[0.01828024]
In New Englan

In [None]:
var[379]

In [None]:
var[12]

In [None]:
" ".join(test_input[379][0])