In [1]:
#! /usr/bin/env python
import os
import argparse
import datetime
import re
import numpy as np
import random


In [2]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim

In [3]:
from torchtext import data
from visdom import Visdom
viz = Visdom()

In [4]:
from data import tokenizer, data_split, preprocess_dataset, create_batches
from data import SemEval10_task8

In [5]:
def l2_loss(parameters):
    norm = 0
    for param in parameters:
        norm += torch.sum((param**2))
    return norm

In [6]:
class LSTM_Baseline_Model(nn.Module):
    def __init__(self, word_vocab, label_vocab, word_emb_dim, pos_emb_dim, hidden_dim, output_dim, MAX_POS = 15, use_gpu = True):
        super(LSTM_Baseline_Model, self).__init__()
        
        # Set hyper parameters
        self.word_emb_dim = word_emb_dim
        self.pos_emb_dim = pos_emb_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.input_dim = word_emb_dim + pos_emb_dim * 2
        
        self.MAX_POS = MAX_POS
        
        
        # Set options and other parameters
        self.use_gpu = use_gpu
        self.word_vocab = word_vocab
        self.label_vocab = label_vocab
        #self.pos_vocab = pos_vocab
        
        
        # Free parameters for the model
        # Initialize embeddings (Word and Position embeddings) 
        self.word_emb = nn.Embedding(len(self.word_vocab), self.word_emb_dim).cuda()
        
        self.pos1_emb = nn.Embedding(self.MAX_POS*2+1, self.pos_emb_dim).cuda()
        self.pos1_emb.weight.data.uniform_(-0.05, 0.05)
        self.pos2_emb = nn.Embedding(self.MAX_POS*2+1, self.pos_emb_dim).cuda()
        self.pos2_emb.weight.data.uniform_(-0.05, 0.05)
        
        # Initialize LSTM parameters ()
        self.lstm = nn.LSTM(self.input_dim, hidden_dim, bidirectional=True, batch_first = True).cuda()
        
        
        # Initialize Attention parameters ()
        self.attention_hidden = nn.Linear(hidden_dim * 2, hidden_dim,bias=False).cuda()
        self.attention = nn.Linear(hidden_dim, 1, bias=False).cuda()
        
        # Initialize Classifier parameters ()
        self.classifier_hidden = nn.Linear(hidden_dim * 2, hidden_dim).cuda()
        self.classifier = nn.Linear(hidden_dim, output_dim).cuda()
        
        
        self.word_emb.weight.data.copy_(word_vocab.vectors)
    def prepare_inout(self, X, y):
        sents, pos1, pos2 = list(zip(*X))
        #sents = list(zip(*X))
        #pos1 = datas['position_indices_1']
        #pos2 = datas['position_indices_2']
        
        labels = y
        
        words = [ [ self.word_vocab.stoi[word] for word in sent] for sent in sents]
        #print(words)
        words_var = Variable(torch.LongTensor(words).cuda())
        #print(words_var)
        word_embeddings = self.word_emb(words_var)
        #print(word_embedings)
        
        pos1 = np.array(pos1).astype('int')
        #print(pos1)
        pos1_var = Variable(torch.LongTensor(pos1).cuda())
        pos1_embeddings = self.pos1_emb(pos1_var)
        #print(pos1_var)
        #print(pos1_embeddings)

        pos2 = np.array(pos2).astype('int')
        #print(pos2)
        pos2_var = Variable(torch.LongTensor(pos2).cuda())
        pos2_embeddings = self.pos2_emb(pos2_var)
        
        inputs = torch.cat((word_embeddings, pos1_embeddings, pos2_embeddings),-1)
        #print(inputs)
        
        labels = [ self.label_vocab.stoi[label] - 1 for label in labels]
        labels_var = Variable(torch.LongTensor(labels).cuda())
        outputs = labels_var
        
        return inputs, outputs
        
        
        
        
    def forward(self, X, is_train = True):
        # LSTM layer
        X = F.dropout(X, p=0.5, training=is_train)
        hiddens, for_output = self.lstm(X)
        #rev_hiddens, rev_output = self.rev_lstm(X)
        hiddens = F.dropout(hiddens, p=0.5, training=is_train)
        
        # Self Attentive layer
        att_hidden = F.tanh(self.attention_hidden(hiddens))
        
        att_scores = self.attention(att_hidden)
        
        attention_distrib = F.softmax(att_scores, dim = 1)
        context_vector = torch.sum(hiddens * attention_distrib, dim = 1)

        # Classifier
        context_hidden = F.tanh(self.classifier_hidden(context_vector))
        context_hidden = F.dropout(context_hidden, p=0.5, training=is_train)
        finals = F.softmax(self.classifier(context_hidden), dim = 1)

        return finals
    
    def evaluatation(self, input, output, demonstrate_result = True, analyze = False, header=""):
        batch_Xs, batch_ys = create_batches(input, output, 128, shuffle=False)
        #loss = 0
        tp = 0
        for batch_X, batch_Y in zip(batch_Xs, batch_ys):
            X, Y = self.prepare_inout(batch_X, batch_Y)
            preds = relation_extr(X, is_train = False)
            _, preds_Y = torch.max(preds, -1)
            tp += (preds_Y == Y).float().sum().data.cpu().numpy()[0]
            
            if analyze:
                for x, y, preds_y in zip(batch_X, batch_Y, preds_Y):
                    input_str = " ".join(x[0])
                    preds_y = preds_y.data.cpu().numpy()[0] + 1
                    if y != self.label_vocab.itos[preds_y]:
                        print("input sentence: " + input_str)
                        print('answer label: ', " ",y)
                        print('wrong label: ', " ",self.label_vocab.itos[preds_y])
                        print()
            del X,Y
            
        if demonstrate_result:
            #print('Avg loss: ')
            print(header + " accuracy: ", tp/float(len(output)))
            
            #print('Macro F1-score')
            #print('Micro F1-score')
        

In [7]:
train, dev = data_split(SemEval10_task8(sub_path='SemEval2010_task8_training/TRAIN_FILE.TXT'), test_rate = 0.1)
test = SemEval10_task8(sub_path='SemEval2010_task8_testing_keys/TEST_FILE_FULL.TXT')

train_input, train_output = preprocess_dataset(train, entity_normalize = False, directional_consideration = False)
train_words = list(zip(*train_input))[0]

dev_input, dev_output = preprocess_dataset(dev, entity_normalize = False, directional_consideration = False)
dev_words = list(zip(*dev_input))[0]

test_input, test_output = preprocess_dataset(test, entity_normalize = False, directional_consideration = False)
test_words = list(zip(*test_input))[0]

In [8]:
TEXT = data.Field(sequential=True,  lower=False)
TEXT.build_vocab(train_words+test_words+dev_words, vectors="glove.840B.300d")
word_vocab = TEXT.vocab

LABEL = data.Field(sequential=False, use_vocab=False)
LABEL.build_vocab(train_output+test_output+dev_output)
label_vocab = LABEL.vocab

In [9]:
epoch_num = 30

learning_rate = 0.001
l2_rate = 10e-5

max_batch_size = 16
#max_num_of_sent = 50
word_emb_dim = 300
pos_emb_dim = 10
hidden_dim = 320

print(len(LABEL.vocab.stoi))
relation_extr = LSTM_Baseline_Model(word_vocab, 
                                    label_vocab, 
                                    word_emb_dim = word_emb_dim, 
                                    pos_emb_dim = pos_emb_dim, 
                                    hidden_dim = hidden_dim, 
                                    output_dim = len(LABEL.vocab.stoi)-1)

11


In [10]:
params = list(relation_extr.parameters())
#print(list(relation_extr.named_parameters()))

# gradient clip
torch.nn.utils.clip_grad_norm(params, 5.0)

loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(params, lr = learning_rate)

In [11]:

for i in range(epoch_num):
    epoch_loss = 0
    tp = 0
    batch_Xs, batch_ys = create_batches(train_input, train_output, max_batch_size)
    if (i + 1)%10:
        optimizer
    for batch_X, batch_y in zip(batch_Xs, batch_ys):
        X, Y = relation_extr.prepare_inout(batch_X, batch_y)
        
        optimizer.zero_grad()
        preds = relation_extr(X)
        #print(preds)
        _, preds_Y = torch.max(preds, -1)
        #print(preds_Y)
        batch_size = Y.size()[0]
        loss = loss_func(preds, Y)
        batch_loss = loss * batch_size #/ max_batch_size
        batch_loss += l2_loss(params[1:]) * l2_rate
        #batch_loss = loss
        
        batch_loss.backward()
        optimizer.step()
        
        epoch_loss += loss.data.cpu().numpy()[0]
        tp += (preds_Y == Y).float().sum().data.cpu().numpy()[0]
        del X, Y, loss, batch_loss, preds, preds_Y
    print("Train epoch",i,epoch_loss/len(train_output))
    print("Train accuracy",i,tp/float(len(train_output)))
    relation_extr.evaluatation(dev_input, dev_output,header = 'Dev')
    relation_extr.evaluatation(test_input, test_output,header = 'Test', analyze = False)
    #print(params[0])
    
    del epoch_loss, tp
    
    
    
    print()

Train epoch 0 0.1445853884021441
Train accuracy 0 0.3401388888888889
Dev accuracy:  0.54875
Test accuracy:  0.5804195804195804

Train epoch 1 0.12791594521866903
Train accuracy 1 0.5873611111111111
Dev accuracy:  0.62875
Test accuracy:  0.6610231873389768

Train epoch 2 0.12230069764786296
Train accuracy 2 0.6652777777777777
Dev accuracy:  0.6725
Test accuracy:  0.7073978652926022

Train epoch 3 0.11921763136982917
Train accuracy 3 0.7094444444444444
Dev accuracy:  0.6875
Test accuracy:  0.7169672432830327

Train epoch 4 0.11704749554395676
Train accuracy 4 0.7443055555555556
Dev accuracy:  0.715
Test accuracy:  0.7269046742730954

Train epoch 5 0.11482526843746503
Train accuracy 5 0.7719444444444444
Dev accuracy:  0.69875
Test accuracy:  0.7467795362532205

Train epoch 6 0.11260885475410355
Train accuracy 6 0.8073611111111111
Dev accuracy:  0.7275
Test accuracy:  0.7486198012513802

Train epoch 7 0.11047689384884304
Train accuracy 7 0.8369444444444445
Dev accuracy:  0.7225
Test accura

In [12]:
relation_extr.evaluatation(train_input, train_output, header = 'Train')

Train accuracy:  0.9743055555555555


In [13]:
relation_extr.evaluatation(test_input, test_output, header = 'Test', analyze  = True)

input sentence: In South Africa , which has one of the best police to public ratios on the continent , the share of <e1> murders </e1> that result in a <e2> conviction </e2> is about 18 % , compared to 56 % in the US and 61 % in the UK .
answer label:    Cause-Effect
wrong label:    Other
input sentence: The <e1> entrance </e1> to the temple was originally from the <e2> east </e2> , near the southeastern corner and decorated with twin monolithic limestone pillars colored red and bearing on the exterior side vertical hieroglyphic inscriptions in sunk relief with the queen 's titulary , name and depiction .
answer label:    Other
wrong label:    Entity-Origin
input sentence: Toward the end of the novel , Chike 's father , a former slave whose wealth was based , in part , on a large <e1> plantation </e1> of cocoa <e2> beans </e2> and coconuts , wakes up one morning to find that all his plants have been cut down .
answer label:    Component-Whole
wrong label:    Member-Collection
input sen

wrong label:    Member-Collection
input sentence: The tribunal has jurisdiction over any <e1> dispute </e1> concerning the <e2> interpretation </e2> or application of other agreements .
answer label:    Other
wrong label:    Message-Topic
input sentence: The <e1> decedent </e1> had left the nursery supply company 's main <e2> yard </e2> earlier in the day .
answer label:    Entity-Origin
wrong label:    Other
input sentence: Recall that in the Blink application , the <e1> timers </e1> were started from the booted <e2> event </e2> .
answer label:    Other
wrong label:    Entity-Origin
input sentence: This adorable <e1> necklace </e1> is made with fine silver plated <e2> chain </e2> and is 20 '' long .
answer label:    Other
wrong label:    Component-Whole
input sentence: Some part of the <e1> train </e1> was inside a <e2> tunnel </e2> and the remaining on the ramp .
answer label:    Other
wrong label:    Content-Container
input sentence: The two <e1> countries </e1> are related through 

input sentence: The painting shows a historical view of the <e1> damage </e1> caused by the 1693 Catania earthquake and the <e2> reconstruction </e2> activities .
answer label:    Other
wrong label:    Cause-Effect
input sentence: The <e1> bank </e1> has drawn up <e2> plans </e2> for the same and may start scouting for partners over next few months .
answer label:    Product-Producer
wrong label:    Other
input sentence: Both <e1> cocaine </e1> and crack cocaine cause constricted blood vessels , increased heart rate , body temperature and blood <e2> pressure </e2> .
answer label:    Other
wrong label:    Cause-Effect
input sentence: At full establishment , a <e1> brigade </e1> of 18-lbr field <e2> guns </e2> consisted of 795 men of whom 23 were officers .
answer label:    Other
wrong label:    Member-Collection
input sentence: Mental health recovery and economic <e1> recovery </e1> after the <e2> storm </e2> : high-frequency longitudinal evidence from Sri Lankan small business owners .

input sentence: Construction <e1> worker </e1> carrying a wooden <e2> plank </e2> on his shoulder .
answer label:    Other
wrong label:    Instrument-Agency
input sentence: A boy gets his <e1> tongue </e1> stuck in a <e2> canteen </e2> .
answer label:    Other
wrong label:    Content-Container
input sentence: The <e1> astronauts </e1> rode to the station in a <e2> capsule </e2> .
answer label:    Other
wrong label:    Entity-Destination
input sentence: Gordon Brown <e1> speech </e1> addresses the <e2> critics </e2> on UK immigration .
answer label:    Other
wrong label:    Message-Topic
input sentence: The <e1> nest </e1> of <e2> hornets </e2> was long and uncovered externally .
answer label:    Other
wrong label:    Member-Collection
input sentence: The bottle carrier converts your <e1> bottle </e1> into a <e2> canteen </e2> .
answer label:    Other
wrong label:    Entity-Destination
input sentence: The <e1> player </e1> starts playing at the next nearest <e2> keyframe </e2> .
answer 

input sentence: The flux <e1> sensor </e1> moves a half <e2> circle </e2> around the filament in order to measure the spatial directivity of each heater .
answer label:    Other
wrong label:    Component-Whole
input sentence: The Galileoscope is named after the Italian <e1> astronomer </e1> , who first observed the heavens through a <e2> telescope </e2> 400 years ago .
answer label:    Instrument-Agency
wrong label:    Product-Producer
input sentence: Accidentally , one <e1> worker </e1> was locked in a refrigerator <e2> boxcar </e2> , and the rest of the workmen left the site .
answer label:    Other
wrong label:    Content-Container
input sentence: Godfrey and I , caught up in the <e1> whirl </e1> of <e2> insects </e2> , had become part of the forest , too .
answer label:    Other
wrong label:    Member-Collection
input sentence: The engine revved a bit in the lower gears , but was adjusted immediately by the <e1> guy </e1> with a <e2> knob </e2> .
answer label:    Instrument-Agency


input sentence: The same <e1> effect </e1> is achieved the traditional <e2> way </e2> , with a team of workers like Keebler elves .
answer label:    Cause-Effect
wrong label:    Other
input sentence: Then , the target PET <e1> bottle </e1> was put inside of a metal <e2> container </e2> , which was grounded .
answer label:    Entity-Destination
wrong label:    Content-Container
input sentence: We dissolved the <e1> contents </e1> of one packet in a <e2> carafe </e2> of water and ran the brew cycle .
answer label:    Entity-Destination
wrong label:    Content-Container
input sentence: A <e1> tableau </e1> like this one is placed inside the king 's <e2> room </e2> and refers to court ceremonies .
answer label:    Content-Container
wrong label:    Entity-Destination
input sentence: <e1> Bone </e1> is built up of many <e2> cell </e2> types and so far we have only investigated human osteoblasts .
answer label:    Component-Whole
wrong label:    Other
input sentence: There is a curb between t

wrong label:    Cause-Effect
input sentence: The <e1> man </e1> was carried into a waiting police <e2> car </e2> from Coleraine Times .
answer label:    Entity-Destination
wrong label:    Other
input sentence: <e1> Analysts </e1> assess distribution and changes in distribution over time by using <e2> frequency </e2> .
answer label:    Instrument-Agency
wrong label:    Other
input sentence: The bee carries <e1> pollen </e1> in a pollen <e2> basket </e2> back to the hive .
answer label:    Content-Container
wrong label:    Other
input sentence: The <e1> mushroom </e1> was found in a <e2> mass </e2> of wet twigs and leaves .
answer label:    Other
wrong label:    Content-Container
input sentence: The boy 's <e1> coffin </e1> is carried to the <e2> funeral </e2> by his uncles .
answer label:    Other
wrong label:    Entity-Destination
input sentence: Active <e1> transport </e1> is a <e2> mechanism </e2> evolved to mediate traffic across cellular membranes .
answer label:    Other
wrong lab

input sentence: By dividing the <e1> space </e1> in a kitchen <e2> drawer </e2> where you keep all your cooking utensils , you grouped items by size or purpose .
answer label:    Other
wrong label:    Entity-Destination
input sentence: In accordance with the provisions of the Interim Agreement both parties agreed on this <e1> protocol </e1> for the <e2> implementation </e2> of the redeployment in Hebron .
answer label:    Other
wrong label:    Message-Topic
input sentence: For example , the photo at the top of the page shows a <e1> pilot </e1> 's emergency <e2> kit </e2> with a watch type compass included .
answer label:    Instrument-Agency
wrong label:    Component-Whole
input sentence: The <e1> company </e1> has mocked up a <e2> version </e2> of YouTube built around the HTML5 video tag , playing mini-movies inside a browser sans plug-ins .
answer label:    Other
wrong label:    Product-Producer
input sentence: A major product of the <e1> factory </e1> was decorative quarry <e2> glas

input sentence: The <e1> shell </e1> encloses a <e2> rod </e2> having a nickel layer with a phosphorous content .
answer label:    Content-Container
wrong label:    Component-Whole
input sentence: Desperados is a CBBC children 's <e1> drama series </e1> following a wheelchair basketball <e2> team </e2> .
answer label:    Message-Topic
wrong label:    Member-Collection
input sentence: The kids get great <e1> joy </e1> from <e2> eating </e2> the tomatoes right off the vine .
answer label:    Cause-Effect
wrong label:    Other
input sentence: We learned so much about <e1> lighting </e1> and rendering from these <e2> works </e2> of art .
answer label:    Message-Topic
wrong label:    Other
input sentence: The two central <e1> areas </e1> of controversy have to do with <e2> composition </e2> and titles .
answer label:    Other
wrong label:    Message-Topic
input sentence: A <e1> data </e1> <e2> warehouse </e2> is a place where data is stored for archival purposes .
answer label:    Content-

input sentence: And destiny lies for us in many places , way up in the High Lonesome with a <e1> cowardice </e1> of <e2> curs </e2> or our passions for the loves we know .
answer label:    Other
wrong label:    Member-Collection
input sentence: I was very lucky to have my friend from ballet knit a baby blanket for my daughter and my friend 's <e1> mother </e1> from Austria knit a baby <e2> sweater </e2> .
answer label:    Product-Producer
wrong label:    Entity-Origin
input sentence: The <e1> convention </e1> of splitting tax <e2> bills </e2> is based on the construction of the first paragraph of s 55 that is rejected by the High Court in Permanent Trustee .
answer label:    Other
wrong label:    Member-Collection
input sentence: A <e1> mother </e1> was seen to have built a small <e2> raft </e2> out of birch bark for her and her children , but it promptly sank among the ice floes .
answer label:    Product-Producer
wrong label:    Other
input sentence: This group 0+ baby <e1> car </e1>

wrong label:    Other
input sentence: All <e1> staff </e1> , from the <e2> chairman </e2> , Charlie Mayfield , to part-time shelf stackers in Waitrose , get the same percentage payout .
answer label:    Member-Collection
wrong label:    Product-Producer
input sentence: They were added to the <e1> photograph </e1> by an unknown <e2> hand </e2> after the picture had been circulating on the Internet for several years .
answer label:    Other
wrong label:    Product-Producer
input sentence: The <e1> film </e1> uses <e2> flashbacks </e2> as a device to tell the story , which was based on a 1947 novel by David Goodis .
answer label:    Component-Whole
wrong label:    Instrument-Agency
input sentence: High <e1> fees </e1> charged by senior lawyers became a point of <e2> discussion </e2> at the Apex Court hearing the Ambani brothers ' gas row .
answer label:    Message-Topic
wrong label:    Product-Producer
input sentence: The <e1> blisters </e1> are caused by <e2> antibodies </e2> against des

wrong label:    Content-Container
input sentence: The 34th <e1> battalion </e1> was a part of the <e2> cavalry </e2> not surrendered by Gen. Robert .
answer label:    Component-Whole
wrong label:    Member-Collection
input sentence: The <e1> box </e1> contained a complete British manual telegraph tape-making and high speed sending <e2> system </e2> .
answer label:    Other
wrong label:    Content-Container
input sentence: One <e1> soldier </e1> was brutally frank in a <e2> message </e2> posted on a website last week .
answer label:    Product-Producer
wrong label:    Member-Collection
input sentence: The <e1> definition </e1> of the vCard Specification made use of a number of existing <e2> standards </e2> .
answer label:    Instrument-Agency
wrong label:    Other
input sentence: The <e1> instrument </e1> was sealed in a <e2> polyethylene bag </e2> for 40 hours at room temperature .
answer label:    Content-Container
wrong label:    Entity-Destination
input sentence: A worker inserts th

input sentence: The plasma processing <e1> apparatus </e1> produces a plasma in a <e2> vessel </e2> and processes samples using the plasma .
answer label:    Component-Whole
wrong label:    Product-Producer
input sentence: Those models were chain driven and the <e1> mechanism </e1> was enclosed in a very visible oil <e2> bath </e2> .
answer label:    Other
wrong label:    Content-Container
input sentence: In 1993 the <e1> party </e1> won the governorship with a <e2> pledge </e2> to cut income taxes 30 % .
answer label:    Product-Producer
wrong label:    Other
input sentence: The <e1> song </e1> is featured in the 1990 film Riff-Raff , performed by the main female <e2> character </e2> .
answer label:    Other
wrong label:    Message-Topic
input sentence: The 400 V <e1> power supply </e1> ( unstabilized ) was derived from a conventional <e2> power-pack </e2> with n-filter .
answer label:    Other
wrong label:    Entity-Origin
input sentence: The coal <e1> industry </e1> has been recycli

In [14]:
LSTM_Baseline(X)

NameError: name 'LSTM_Baseline' is not defined

In [None]:
a = [[0]*10]*20
print(a)
a[0][0] = 1
print(a)

In [None]:
optimizer.