In [5]:
# If your working dir is pytorch_lstm_01intro, you need to move up one level to use the library. 
import os
cwd = os.getcwd() 
pwd = cwd.replace("pytorch_lstm_01intro/", "")
os.chdir(pwd)

print(pwd)

/home/sishi/NLP/pytorch_in_ten_days


In [6]:
'''
This is essentially the same as main_v2.ipynb 
The main improvements are:
1. Now the input is a customizable csv, instead of hard coded in the text
2. Build a customizable training function.
'''
import torch
import torch.nn as nn

import torch.optim as optim
import pandas as pd

from pytorch_lstm_01intro.model_lstm_tagger import LSTMTagger
from pytorch_lstm_01intro.preprocess import seq_to_embedding, seqs_to_dictionary
from pytorch_lstm_01intro.train import train, test


In [7]:
torch.manual_seed(1)
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

# read in raw data
training_data_raw = pd.read_csv("./pytorch_lstm_01intro/train.csv")
# create mappings

#split texts and tags into training data.
texts = [t.split() for t in training_data_raw["text"].tolist()]
tags_list = [t.split() for t in training_data_raw["tag"].tolist()]

training_data = list(zip(texts, tags_list))

In [8]:
# Create mapping 
word_to_ix, tag_to_ix = seqs_to_dictionary(training_data)

print(word_to_ix)
print(tag_to_ix) 


{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'book': 7}
{'DET': 0, 'NN': 1, 'V': 2}


In [12]:
# Usually 32 or 64 dim. Keeping them small
# You can toggle is_nll_loss on or off. 

model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(
    word_to_ix), len(tag_to_ix), is_nll_loss=True)

loss_function = nn.NLLLoss() if model.is_nll_loss else nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)



In [13]:

# get embeddings
# See what the scores are before training
# Note that element i,j of the output is the score for tag j for word i.
# Here we don't need to train, so the code is wrapped in torch.no_grad()
testing_data = "The dog ate the book"

print("tag_scores before training:")
test(testing_data, model, word_to_ix)

train(model, loss_function, training_data, word_to_ix, tag_to_ix, optimizer, epoch=100)

tag_scores before training:
Using NLL Loss:
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  0 : tensor(1.1028, grad_fn=<NllLossBackward>)
['Everybody', 'read', 'the', 'book']
['NN', 'V', 'DET', 'NN']
loss for epoch  0 : tensor(1.2176, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  1 : tensor(1.0891, grad_fn=<NllLossBackward>)
['Everybody', 'read', 'the', 'book']
['NN', 'V', 'DET', 'NN']
loss for epoch  1 : tensor(1.1964, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  2 : tensor(1.0771, grad_fn=<NllLossBackward>)
['Everybody', 'read', 'the', 'book']
['NN', 'V', 'DET', 'NN']
loss for epoch  2 : tensor(1.1775, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  3 : tensor(1.0666, grad_fn=<NllLossBackward>)
['Everybody', 'read', 'the', 'book']
['NN', 'V', 'DET', 

loss for epoch  61 : tensor(0.8394, grad_fn=<NllLossBackward>)
['Everybody', 'read', 'the', 'book']
['NN', 'V', 'DET', 'NN']
loss for epoch  61 : tensor(0.8742, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  62 : tensor(0.8338, grad_fn=<NllLossBackward>)
['Everybody', 'read', 'the', 'book']
['NN', 'V', 'DET', 'NN']
loss for epoch  62 : tensor(0.8686, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  63 : tensor(0.8282, grad_fn=<NllLossBackward>)
['Everybody', 'read', 'the', 'book']
['NN', 'V', 'DET', 'NN']
loss for epoch  63 : tensor(0.8628, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  64 : tensor(0.8225, grad_fn=<NllLossBackward>)
['Everybody', 'read', 'the', 'book']
['NN', 'V', 'DET', 'NN']
loss for epoch  64 : tensor(0.8569, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']


In [14]:
# We roughly expect 0, 1, 2, 0, 1 
print("tag_scores after training:")
test(testing_data, model, word_to_ix)

tag_scores after training:
Using NLL Loss:


tensor([[0.6192, 0.2967, 0.0841],
        [0.2137, 0.6324, 0.1538],
        [0.0739, 0.4424, 0.4837],
        [0.5729, 0.3131, 0.1140],
        [0.1383, 0.6766, 0.1851]])