In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
os.environ["CUDA_VISIBLE_DEVICES"]= "0"
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
import torch.backends.cudnn as cudnn
cudnn.benchmark = True
import time
start_time = time.time()

torch.manual_seed(1)

<torch._C.Generator at 0x7f8de3ee4a90>

In [2]:
import preprocessor
data = preprocessor.load_data()

training_data = []
for sent in data:
    word, pos = [],[]
    for token in sent:
        w = token[1]
        p = token[4]
        word.append(w)
        pos.append(p)
    pair = (word, pos)
    training_data.append(pair)
    
print((training_data[0]))
def prepare_index():
    word_to_ix = {}
    pos_to_ix = {}
    for word, pos in training_data:
        for w in word:
            if w not in word_to_ix:
                word_to_ix[w] = len(word_to_ix)
        for p in pos:
            if p not in pos_to_ix:
                pos_to_ix[p] = len(pos_to_ix)
    return word_to_ix, pos_to_ix
word_to_ix, pos_to_ix = prepare_index()
print('###  word vocab size:', len(word_to_ix))
print('### pos vocab size:', len(pos_to_ix), '\n')

### loading data now...
(['태풍', 'Hugo가', '남긴', '피해들과', '회사', '내', '몇몇', '주요', '부서들의', '저조한', '실적들을', '반영하여,', 'Aetna', 'Life', 'and', 'Casualty', 'Co.의', '3분기', '순이익이', '182.6', '백만', '달러', '또는', '주당', '1.63', '달러로', '22', '%', '하락하였다.'], ['NNG', 'SL+JKS', 'VV+ETM', 'NNG+JC', 'NNG', 'NP+JKG', 'MM', 'NNG', 'NNG+JKG', 'VA+ETM', 'NNG+JKO', 'VV+EC+SP', 'SL', 'SL', 'SL', 'SL', 'SL+SF+JKG', 'SN+NNG', 'NNG+JKS', 'SN', 'NR', 'NNB', 'MAJ', 'NNG', 'SN', 'NNB+JKB', 'SN', 'SW', 'VV+EP+EF+SF'])
###  word vocab size: 21171
### pos vocab size: 861 



In [3]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs).type(torch.cuda.LongTensor)
# These will usually be more like 32 or 64 dimensional.
# We will keep them small, so we can see how the weights change as we train.
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

In [4]:
class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly
        # why they have this dimensionality.
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        return (torch.zeros(1, 1, self.hidden_dim).cuda(),
                torch.zeros(1, 1, self.hidden_dim).cuda())

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        embeds.cuda()
        lstm_out, self.hidden = self.lstm(
            embeds.view(len(sentence), 1, -1), self.hidden)
#         print('\nLSTM out')
#         print(lstm_out)
#         print('')
#         print('\nNEW LSTM out.view')
#         print(lstm_out.view(len(sentence), -1))
#         print('')
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
#         print('\tag_scores')
#         print(tag_scores)
        return tag_scores

In [5]:
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(pos_to_ix))
model.cuda()
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# See what the scores are before training
# Note that element i,j of the output is the score for tag j for word i.
# Here we don't need to train, so the code is wrapped in torch.no_grad()

total_step = len(training_data)
for epoch in range(10):  # again, normally you would NOT do 300 epochs, it is toy data
    n = 0
    for sentence, tags in training_data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Also, we need to clear out the hidden state of the LSTM,
        # detaching it from its history on the last instance.
        model.hidden = model.init_hidden()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of word indices.
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, pos_to_ix)
#         print('targets')
#         print(len(targets))
#         print(targets)
        # Step 3. Run our forward pass.
        tag_scores = model(sentence_in)
#         print('\ntag_scores')
#         print(len(tag_scores[0]))
#         print(tag_scores[0])
        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()
        
        n = n+1
        if n % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, 50, n, total_step, loss.item()))
#         break

# See what the scores are after training
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)

    # The sentence is "the dog ate the apple".  i,j corresponds to score for tag j
    # for word i. The predicted tag is the maximum scoring tag.
    # Here, we can see the predicted sequence below is 0 1 2 0 1
    # since 0 is index of the maximum value of row 1,
    # 1 is the index of maximum value of row 2, etc.
    # Which is DET NOUN VERB DET NOUN, the correct sequence!
    print(tag_scores)

Epoch [1/50], Step [100/12431], Loss: 6.2165
Epoch [1/50], Step [200/12431], Loss: 5.4325
Epoch [1/50], Step [300/12431], Loss: 5.7744
Epoch [1/50], Step [400/12431], Loss: 5.1566
Epoch [1/50], Step [500/12431], Loss: 4.9881
Epoch [1/50], Step [600/12431], Loss: 4.7192
Epoch [1/50], Step [700/12431], Loss: 3.7847
Epoch [1/50], Step [800/12431], Loss: 3.8105
Epoch [1/50], Step [900/12431], Loss: 4.2002
Epoch [1/50], Step [1000/12431], Loss: 4.4125
Epoch [1/50], Step [1100/12431], Loss: 3.8686
Epoch [1/50], Step [1200/12431], Loss: 4.2002


KeyboardInterrupt: 