In [1]:
import json
from pprint import pprint
from utils.data_utils import *

## Setup

In [2]:
# # Clone the repository
# !git clone https://github.com/facebookresearch/colorlessgreenRNNs

# # Navigate to the src directory, check its contents
# %cd colorlessgreenRNNs/src
# !ls

# # Get the pretrained model:
# !wget https://dl.fbaipublicfiles.com/colorless-green-rnns/best-models/English/hidden650_batch128_dropout0.2_lr20.0.pt

# !mkdir ../data/lm/English
# !wget -P ../data/lm/English/ https://dl.fbaipublicfiles.com/colorless-green-rnns/training-data/English/train.txt
# !wget -P ../data/lm/English/ https://dl.fbaipublicfiles.com/colorless-green-rnns/training-data/English/test.txt
# !wget -P ../data/lm/English/ https://dl.fbaipublicfiles.com/colorless-green-rnns/training-data/English/valid.txt
# !wget -P ../data/lm/English/ https://dl.fbaipublicfiles.com/colorless-green-rnns/training-data/English/vocab.txt

# # %cd colorlessgreenRNNs/src/language_models


In [3]:
%cd colorlessgreenRNNs/src/language_models

/home/liza/Projects/701/colorlessgreenRNNs/src/language_models


In [19]:
import torch
import torch.nn as nn
import numpy as np

torch.manual_seed(50360)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(50360)

model_ = None
fn = "../hidden650_batch128_dropout0.2_lr20.0.pt"
with open(fn, "rb") as model_f:
    model_ = torch.load(fn)

if torch.cuda.is_available():
    torch.device('cuda')
else:
    torch.device('cpu')

# Makes sure your model is loaded onto the GPU (should return true)
next(model_.parameters()).is_cuda

# Finally - load the model.
from model import RNNModel

# Construct a new RNNModel using PyTorch 1.x implementations of NN modules
model = RNNModel("LSTM", 50001, 650, 650, 2, 0.2, False)
# Copy over the trained weights from the model loaded in
model.load_state_dict(model_.state_dict())

model = model.cuda()




## Example Usage

In [5]:
# ## Import dictionary_corpus, part of the colorlessgreenRNNs repoistory that has some use useful functions
# import dictionary_corpus

# data_path = "../../data/lm/English"
# dictionary = dictionary_corpus.Dictionary(data_path)

# sentence = "the key to the cabinets"
# tokenized_sentence = sentence.split()
# input = [dictionary.word2idx[w]  if w in dictionary.word2idx
#                                     else dictionary.word2idx["<unk>"]
#                   for w in tokenized_sentence]

# for w in tokenized_sentence:
#     if w not in dictionary.word2idx:
#         print(w, ' not in vocab!')

# input = torch.tensor(input, dtype=torch.long)
# input = input.cuda()

# ## Extract the hidden and output layers at each input token:
# cur_sentence_output, cur_sentence_hidden = model(input.view(-1, 1), # (sequence_length, batch_size).
#                        model.init_hidden(1)) # one input at a time, thus batch_size = 1

# ## Size of the output for the current sentence: Length of the sequence (in words), batch size (1), and length of the vocabulary (50001)
# ## This is the score of each word in the vocabulary (N = 50001) as the next token in the sequence, for each of the 5 input positions.  
# cur_sentence_output.size()

# ## The output at the final token represents the predictions about the next word in the sequence. 
# ## We can pull out the next word predictions explicitly, since that's what we care about.
# next_word_scores = cur_sentence_output[-1].view(-1)

# ## What's the most likely next word?
# ## Get the maximum value, and the associated id, with the max method:
# predicted_word, predicted_word_id = next_word_scores.max(0)

# predicted_word_id = predicted_word_id.item() # get an int out of a 1x1 tensor. Remove this line to see the difference!
# print("Most likely next word ID:",predicted_word_id)
# print("Most likely next word:", dictionary.idx2word[predicted_word_id])

# ## Retrieve the score of a particular word
# is_idx = dictionary.word2idx["is"]
# are_idx = dictionary.word2idx["are"]

# print("is score: ", next_word_scores[is_idx].item())
# print("are score: ", next_word_scores[are_idx].item())

# ## Scores are uninterpretable on their own; we might want to convert them 
# ## to a probability distribution using softmax
# import torch.nn.functional as F

# next_word_dist = F.softmax(next_word_scores, dim=0)

# print("is prob: ", next_word_dist[is_idx].item())
# print("are prob: ", next_word_dist[are_idx].item())


## Run Model and Append to Data

In [13]:
## Import dictionary_corpus, part of the colorlessgreenRNNs repoistory that has some use useful functions
import dictionary_corpus
import torch.nn.functional as F

data_path = "../../data/lm/English"
dictionary = dictionary_corpus.Dictionary(data_path)
np.random.seed(50360)


In [18]:
def run_model(intro, query):
    intro, query = intro.split(), query.split()
    for w in intro + query:
        if w not in dictionary.word2idx:
            print(w, ' not in vocab!')

    tokenized_intro = [dictionary.word2idx[w]  if w in dictionary.word2idx
                                        else dictionary.word2idx["<unk>"]
                    for w in intro]
    tokenized_query = [dictionary.word2idx[w]  if w in dictionary.word2idx
                                        else dictionary.word2idx["<unk>"]
                    for w in query]

    for query_token in tokenized_query:

        print(f'intro: {[dictionary.idx2word[w] for w in tokenized_intro]}')
        print(f'query: {dictionary.idx2word[query_token]}')

        input = torch.tensor(tokenized_intro, dtype=torch.long).cuda()

        ## Extract the hidden and output layers at each input token:
        cur_sentence_output, cur_sentence_hidden = model(input.view(-1, 1), # (sequence_length, batch_size).
                            model.init_hidden(1)) # one input at a time, thus batch_size = 1
        next_word_scores = cur_sentence_output[-1].view(-1)
        
        next_word_dist = F.softmax(next_word_scores, dim=0)
        print("query token prob: ", next_word_dist[query_token].item())
        tokenized_intro.append(query_token)
