In [15]:
# If your working dir is pytorch_lstm_01intro, you need to move up one level to use the library. 
import os
cwd = os.getcwd() 
pwd = cwd.replace("pytorch_lstm_01intro/", "")
os.chdir(pwd)

print(pwd)

/home/sishi/NLP/pytorch_in_ten_days


In [16]:
'''
This is essentially the same as main_v2.ipynb 
The main improvements are:
1. Now the input is a customizable csv, instead of hard coded in the text
2. Build a customizable training function.
'''
import torch
import torch.nn as nn

import torch.optim as optim
import pandas as pd

from pytorch_lstm_01intro.model_lstm_tagger import LSTMTagger
from pytorch_lstm_01intro.preprocess import seq_to_embedding, seqs_to_dictionary
from pytorch_lstm_01intro.train import train, test


In [22]:
torch.manual_seed(1)
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

# read in raw data
training_data_raw = pd.read_csv("./pytorch_lstm_01intro/train.csv")
# create mappings

#split texts and tags into training data.
texts = [t.split() for t in training_data_raw["text"].tolist()]
tags_list = [t.split() for t in training_data_raw["tag"].tolist()]

training_data = list(zip(texts, tags_list))

print(training_data)

[(['The', 'dog', 'ate', 'the', 'apple'], ['DET', 'NN', 'V', 'DET', 'NN']), (['The', 'man', 'read', 'the', 'book'], ['DET', 'NN', 'V', 'DET', 'NN'])]


In [18]:
# Create mapping 
word_to_ix, tag_to_ix = seqs_to_dictionary(training_data)

print(word_to_ix)
print(tag_to_ix) 


{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'man': 5, 'read': 6, 'book': 7}
{'DET': 0, 'NN': 1, 'V': 2}


In [19]:
# Usually 32 or 64 dim. Keeping them small
# You can toggle is_nll_loss on or off. 

model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(
    word_to_ix), len(tag_to_ix), is_nll_loss=True)

loss_function = nn.NLLLoss() if model.is_nll_loss else nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)



In [20]:

# get embeddings
# See what the scores are before training
# Note that element i,j of the output is the score for tag j for word i.
# Here we don't need to train, so the code is wrapped in torch.no_grad()
testing_data = "The dog ate the book"

print("tag_scores before training:")
test(testing_data, model, word_to_ix)

train(model, loss_function, training_data, word_to_ix, tag_to_ix, optimizer, epoch=200)

tag_scores before training:
Using NLL Loss:
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  0 : tensor(1.1617, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  0 : tensor(1.1236, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  1 : tensor(1.1480, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  1 : tensor(1.1115, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  2 : tensor(1.1362, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  2 : tensor(1.1010, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  3 : tensor(1.1258, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'b

loss for epoch  59 : tensor(1.0016, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  59 : tensor(0.9681, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  60 : tensor(1.0001, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  60 : tensor(0.9661, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  61 : tensor(0.9985, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  61 : tensor(0.9639, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  62 : tensor(0.9970, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  62 : tensor(0.9618, grad_fn=<NllLossBackward>)
['The'

loss for epoch  117 : tensor(0.8264, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  117 : tensor(0.7333, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  118 : tensor(0.8210, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  118 : tensor(0.7270, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  119 : tensor(0.8154, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  119 : tensor(0.7206, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  120 : tensor(0.8098, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  120 : tensor(0.7142, grad_fn=<NllLossBackward>

loss for epoch  177 : tensor(0.3838, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  177 : tensor(0.3323, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  178 : tensor(0.3769, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  178 : tensor(0.3262, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  179 : tensor(0.3701, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  179 : tensor(0.3202, grad_fn=<NllLossBackward>)
['The', 'dog', 'ate', 'the', 'apple']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  180 : tensor(0.3634, grad_fn=<NllLossBackward>)
['The', 'man', 'read', 'the', 'book']
['DET', 'NN', 'V', 'DET', 'NN']
loss for epoch  180 : tensor(0.3143, grad_fn=<NllLossBackward>

In [21]:
# We roughly expect 0, 1, 2, 0, 1 
print("tag_scores after training:")
test(testing_data, model, word_to_ix)

tag_scores after training:
Using NLL Loss:


tensor([[0.8799, 0.0994, 0.0207],
        [0.2410, 0.6241, 0.1349],
        [0.0683, 0.1243, 0.8074],
        [0.7749, 0.0817, 0.1435],
        [0.0158, 0.9476, 0.0366]])