In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# adapted from https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html

In [None]:
# load data

In [None]:
# process data

In [28]:
SOS_token = 0
EOS_token = 1

class Vocab:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [14]:
class VectorSelector:
    def __init__(self, name):
        self.name = name
        self.vector2index = {}
        self.vector2count = {}
        self.index2vector = {}
        self.n_vectors = 0

    def addVector(self, vector):
        if vector not in self.vector2index:
            self.vector2index[vector] = self.n_vectors
            self.vector2count[vector] = 1
            self.index2vector[self.n_vectors] = vector
            self.n_vectors += 1
        else:
            self.vector2count[vector] += 1

In [24]:
# build model

class EncoderRNN(nn.Module):
    def __init__(self, action_size, entity_size, frame_size, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        
        self.action_embedding = nn.Embedding(action_size, hidden_size)
        self.entity_embedding = nn.Embedding(entity_size, hidden_size)
        self.frame_embedding = nn.Embedding(frame_size, hidden_size)
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [25]:
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

NameError: name 'MAX_LENGTH' is not defined

In [33]:
def prepareData(actions, entities, frames, sentences):
    
    input_action = VectorSelector('actions')
    for action in actions:
        input_action.addVector(action)
    print("Counted actions:")
    print(input_action.n_vectors)

    input_entity = VectorSelector('entities')
    for entity in entities:
        input_entity.addVector(entity)
    print("Counted entities:")
    print(input_entity.n_vectors)
    
    input_frame = VectorSelector('frames')
    for frame in frames:
        input_frame.addVector(frame)
    print("Counted frames:")
    print(input_frame.n_vectors)
  
    input_vocab = Vocab('vocab')
    for sentence in sentences:
        input_vocab.addSentence(sentence)
    print("Counted vocab:")
    print(input_vocab.n_words)
    
    
    return input_action, input_entity, input_frame, input_vocab

In [35]:
hidden_size = 50

actions = ['cut', 'wash', 'eat', 'chop', 'throw']
entities = ['Bill', 'ball', 'field']
frames = ['Sbj V', 'Sbj V Obj', 'Sbj V Obj PathP', 'Sbj V Obj ResultP', 'Sbj V Comp']

sentences = ['Bill cut the salad', 'She ate the cheese', 'We threw the apple across the room']

input_action, input_entity, input_frame, input_vocab = prepareData(actions, entities, frames, sentences)

encoder1 = EncoderRNN(input_action.n_vectors, 
                      input_entity.n_vectors, 
                      input_frame.n_vectors, 
                      input_vocab.n_words, 
                      hidden_size).to(device)

Counted actions:
5
Counted entities:
3
Counted frames:
5
Counted vocab:
14
