In [2]:
import torch 
from torch import nn, optim
from torch.autograd import Variable
from tqdm import tqdm
from random import randint, shuffle
import string

In [3]:
use_cuda = torch.cuda.is_available()
if use_cuda:
    print ('CUDA is available')

In [4]:
# helper function to make one hot embedding when the alphabet is provided 
#alphabet is the set of uniq characters in your language
class OneHotEmbedding:
    def __init__(self, alphabet):
        self.alphabet = alphabet
        self.inverse_map = dict(enumerate(alphabet))
        self.map = dict(zip(alphabet, range(len(alphabet))))

    def N(self):
        return len(alphabet)

    def encode(self, x):
        # T x B x H = len(x) x 1 x N
        v = torch.FloatTensor(self.N()).zero_()
        v[self.map[x]] = 1
        return v
    
    def label(self, x):
        return torch.LongTensor([self.map[x]])

    def inverse_label(self, x):
        return self.inverse_map[x]

    def decode(self, y):
        _, max_probs = torch.max(y.transpose(0, 1), 2)
        max_probs = max_probs.squeeze()
        return self.inverse_classes(max_probs)

In [5]:
# Model Def
class Network(nn.Module):
    def __init__(self, **kw):
        super(Network, self).__init__()
        self.input_size = kw['input_size']
        self.hidden_size = kw['hidden_size']
        self.output_size = kw['output_size']
        self.n_layers = kw['n_layers']

        self.fc_in = nn.Linear(self.input_size, self.hidden_size)
        self.rnn = nn.GRU(self.hidden_size, self.hidden_size, self.n_layers)
        self.fc_out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x, h):
        # One hot vector of single column coming in. 
        # View sorcery is to adjust to the layer's dimension requirement
        # Size(D) -> Size(1,D)

        x = self.fc_in(x.view(1, -1))

        # Mimicking TxBxD, required by RNN.
        # h(t-1) in, h(t) out.
        x, h = self.rnn(x.view(1, 1, -1), h)

        x = self.fc_out(x.view(1, -1))
        return x, h

    def init_hidden(self):
        return Variable(torch.zeros(self.n_layers, 1, self.hidden_size))

In [6]:
printable=string.printable
#reads the text, make everything lower ( so that we will have lower class labels)
# and removes non printable characters from the corpus
text = open("../../../data/lab2/sh.txt").read().lower()

pruned_text = ''
for c in text:
    if c in printable and c not in '{}[]&_':
        pruned_text += c
text = pruned_text
alphabet = list(set(list(text)))

print ('size of your alphabet =', len(alphabet))
print ('your alphabet is =', alphabet)

onehot = OneHotEmbedding(alphabet)

chunk_size = 128
batch_length = 64
hidden_size = 100
n_layers = 1
#input and output sizes are =len(alphabet) = onehot.N(). 
net = Network(input_size=onehot.N(), hidden_size=hidden_size, output_size=onehot.N(), n_layers=n_layers)
criterion = nn.CrossEntropyLoss()
learning_rate = 5e-3
optimizer = optim.Adam(net.parameters(), learning_rate)

if use_cuda:
    net=net.cuda()
    criterion=criterion.cuda()
epoch = 0

IOError: [Errno 2] No such file or directory: '../../../data/lab2/sh.txt'

In [7]:
def generate(**kw):
    result = kw['prime']

    h = net.init_hidden()
    if use_cuda:
        h = h.cuda()

    x = None

    for char in result:
        x = onehot.encode(char)
        if use_cuda:
            x=x.cuda()
        x = Variable(x, requires_grad=False)
        y, h = net(x, h)

        
    for p in range(kw["length"]):
        y, h = net(x, h)
        y_dist = y.data.view(-1).div(kw["temperature"]).exp()
        argmax = torch.multinomial(y_dist, 1)[0]

        prediction = onehot.inverse_label(argmax)
        result += prediction
        x = onehot.encode(prediction)
        if use_cuda:
            x=x.cuda()
        x = Variable(x, requires_grad=False)
    return result

In [8]:
for j in range(100):
        # Hidden Layer Initialized only at start.
        # Needs to be carried throughout the text.
        h = net.init_hidden()
        h = h.cuda()
        for k, i in enumerate(range(0, len(text)-chunk_size, chunk_size)):
            chunk = text[i:i+chunk_size+1]
            xs, zs = chunk[:-1], chunk[1:]

            loss = 0

            net.zero_grad()
            # Iterate through each character -> next character mapping
            # Carrying hidden state forward.
            for x, z in zip(xs, zs):
                x = onehot.encode(x)
                z = onehot.label(z)
                x = x.cuda()
                z = z.cuda()
                x = Variable(x, requires_grad=False)
                z = Variable(z)
                y, h = net(x, h)
                loss += criterion(y.view(1, -1), z)
            


            # Saving h again, so it's not consumed by .backward() ahead.
            h = h.data
            h = h.cuda()
            h = Variable(h, requires_grad=True)

            loss.backward()
            optimizer.step()

            #print("Loss: ", loss.data[0]/len(xs))
            if k % 50 == 0:
                new = generate(prime='elementary my dear watson'.lower(), temperature=0.8, length=100)
                print("----- Generated %d: --------------\n"%(k), new)
            if k%5000 ==0:
                kstring=str(k)
                jstring=str(j)
                torch.save(net, 'char_rnn_stateful_onehot_'+jstring+'_'+kstring+'.pt')

NameError: name 'net' is not defined