In [None]:
import unicodedata
import string

# The ' (single quote) is for the words from the unix dictionary 'words'
ALL_LETTERS = string.ascii_letters + "'"
LETTERS_TOTAL = len(ALL_LETTERS) + 1

In [None]:
ALL_LETTERS

In [None]:
LETTERS_TOTAL

In [None]:
with open('/usr/share/dict/words') as fo:
    words = fo.read().split('\n')

In [None]:
def unicodeToAscii(s):
    '''Return "normalised" string for ASCII format'''
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in ALL_LETTERS
    )

In [6]:
words = [unicodeToAscii(word) for word in words if word]

In [7]:
print("number of empty strings:", len([w for w in words if len(w) == 0]))

number of empty strings: 0


In [8]:
# Neural Network time!

In [None]:
import torch
import torch.nn as nn

class firstRNN(nn.Module):
    '''My first ever Recurrent Neural Network'''
    
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.o2o = nn.Linear(output_size + hidden_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        '''Forward function for training'''
        input_combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        combined_output = torch.cat((output, hidden), 1)
        output = self.o2o(combined_output)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden
        
    def getInitialisedHidden(self):
        return torch.ones([1, self.hidden_size])
        

In [10]:
import random

def randomWord(l):
    '''Return a random item from a list'''
    return random.choice(l)


In [11]:
randomWord(['cmon', 'fuck', 'what', 'is', 'this', '?'])

'this'

In [12]:
def inputTensor(word):
    '''Return a tensor representing a word in terms of multi-dimensional ndarrays of 0s and 1s'''
    # Each letter is represented as an array of 0s and 1 represents index of the letter.
    # Keep in mind that the '1' in the function creates a tensor to contain the array,
    # because pytorch accepts input as batchers rather than 'actual' values.
    tensor = torch.zeros(len(word), 1, LETTERS_TOTAL)
    for letter_tensor, letter in zip(tensor, word):
        letter_tensor[0][ALL_LETTERS.find(letter)] = 1
    return tensor

def targetTensor(word):
    '''Return a tensor of type Long that represents letters in terms of index positions including EOS'''
    return torch.LongTensor([ALL_LETTERS.find(letter) for letter in word] + [LETTERS_TOTAL - 1])
        

In [13]:
# TESTS FOR inputTensor FUNCTION
assert inputTensor('a').shape == (1, 1, LETTERS_TOTAL)
t = torch.zeros(1, 1, LETTERS_TOTAL)
t[0][0][0] = 1
assert torch.all(torch.eq(t, inputTensor('a')))

assert inputTensor("a'").shape == (2, 1, LETTERS_TOTAL)
t = torch.zeros(2, 1, LETTERS_TOTAL)
t[0][0][0] = 1
t[1][0][LETTERS_TOTAL - 2] = 1 # -2 coz the -1 is EOS
assert torch.all(torch.eq(t, inputTensor("a'")))

assert inputTensor("az'").shape == (3, 1, LETTERS_TOTAL)
t = torch.zeros(3, 1, LETTERS_TOTAL)
t[0][0][0] = 1
t[1][0][25] = 1 # Index for letter z
t[2][0][LETTERS_TOTAL - 2] = 1 # -2 coz the -1 is EOS
assert torch.all(torch.eq(t, inputTensor("az'")))

In [14]:
# TESTS FOR targetTensor FUNCTION
assert list(targetTensor('a').shape) == [2]
assert torch.all(torch.eq(targetTensor('a'), torch.LongTensor([0, 53])))

assert list(targetTensor("a'").shape) == [3]
assert torch.all(torch.eq(targetTensor("a'"), torch.LongTensor([0, 52, 53])))

assert list(targetTensor("az'").shape) == [4]
assert torch.all(torch.eq(targetTensor("az'"), torch.LongTensor([0, 25, 52, 53])))

In [15]:
import random
def getRandomTraningTensorSet(words):
    '''Return an input tensor and a target tensor for a randomly selected word'''
    random_word = random.choice(words)
    return inputTensor(random_word), targetTensor(random_word)

In [16]:
# Test for getRandomTrainingTensorSet function
in_t, out_t = getRandomTraningTensorSet(['a'])
exp_in_t, exp_out_t = torch.Tensor([[[1]+53*[0]]]), torch.LongTensor([0, 53])
assert torch.all(torch.eq(exp_in_t, in_t))
assert torch.all(torch.eq(exp_out_t, out_t))

In [87]:
# Tranining time!

criterion = nn.NLLLoss()

learning_rate = 0.0005

def train(input_word_tensor, target_word_tensor, rnn):
    '''Train a given RNN model with an input and target tensors that represent a word'''
    # unsqueeze function makes each value in 1d tensor to be placed within
    # its own tensor. (e.g. tensor of shape (4) turns into shape (4, 1))
    target_word_tensor.unsqueeze_(-1)
    hidden = rnn.getInitialisedHidden()
    
    rnn.zero_grad()
    
    loss = 0
    
    for input_letter_tensor, target_letter_tensor in zip(input_word_tensor, target_word_tensor):
        #output, hidden = rnn(input_letter_tensor, hidden)
        output, hidden = rnn(input_letter_tensor, hidden)
        # Add up losses for each letter prediction
        loss += criterion(output, target_letter_tensor)
        
    loss.backward()
    
    for parameter in rnn.parameters():
        parameter.data.add_(parameter.grad.data, alpha=-learning_rate)
        
    return output, loss.item() / input_word_tensor.size(0)

In [60]:
exp_rnn = firstRNN(LETTERS_TOTAL, 128, LETTERS_TOTAL)

hidden = exp_rnn.getInitialisedHidden()
inp, out = getRandomTraningTensorSet(words)
first_letter = inp[0]
output, hidden = exp_rnn(first_letter, hidden)

In [62]:
output

tensor([[-4.2770, -3.7179, -3.7710, -4.1199, -4.1054, -4.1442, -3.8545, -3.9437,
         -3.9825, -3.4763, -4.4841, -3.9566, -3.8151, -4.5057, -4.0466, -3.6666,
         -4.3094, -4.4502, -3.7508, -3.4739, -3.9692, -4.1555, -4.0466, -3.5082,
         -4.0984, -3.8538, -3.9955, -4.4138, -4.4183, -4.3660, -3.6728, -4.1251,
         -4.5279, -4.1281, -3.9539, -3.8355, -3.8203, -3.8174, -4.0334, -4.0459,
         -4.0466, -4.2116, -4.0663, -4.0448, -4.1923, -4.0337, -4.0630, -3.3131,
         -4.5195, -3.5055, -4.4965, -4.3511, -4.1000, -4.1606]],
       grad_fn=<LogSoftmaxBackward>)

In [49]:
import time
import math

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s/60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [None]:
rnn = firstRNN(LETTERS_TOTAL, 128, LETTERS_TOTAL)

n_iters = 100000
print_every = 10
plot_every = 500
all_losses = []
total_loss = 0 # Reset every plot_every iters

start = time.time()

for iter in range(1, n_iters + 1):
    input_tensor, target_tensor = getRandomTraningTensorSet(words)
    output, loss = train(input_tensor, target_tensor, rnn)
    total_loss += loss

    if iter % print_every == 0:
        print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss))

    if iter % plot_every == 0:
        all_losses.append(total_loss / plot_every)
        total_loss = 0

In [50]:
rnn

firstRNN(
  (i2h): Linear(in_features=182, out_features=128, bias=True)
  (i2o): Linear(in_features=182, out_features=54, bias=True)
  (o2o): Linear(in_features=182, out_features=54, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (softmax): LogSoftmax()
)

In [82]:
input_letter = 'h'

def evaluate(input_letter, rnn):
    '''Gimme me a word'''
    with torch.no_grad():
        letters = [input_letter]

        input_letter_tensor = inputTensor(input_letter)
        hidden = rnn.getInitialisedHidden()
        
        counter = 0
        while(counter < 20):
            output, hidden = rnn(input_letter_tensor[0], hidden)
            topv, topi = output.topk(1)
            topi = topi[0][0]
            if topi == LETTERS_TOTAL - 1:
                break
            else:
                letter = ALL_LETTERS[topi]
                letters.append(letter)
                input_letter_tensor = inputTensor(letter)
            counter += 1
        
        return ''.join(letters)
            

In [83]:
evaluate('y', backup_rnn)

'yyyyyyyyyiiiiiiiihhhh'

In [58]:
# That's some gibberish. Let's train it with words that contain fewer letters.

In [84]:
four_letter_words = [word for word in words if len(word) <=4]

In [197]:
rnn = firstRNN(LETTERS_TOTAL, 128, LETTERS_TOTAL)

n_iters = 100000
print_every = 5000
plot_every = 500
all_losses = []
total_loss = 0 # Reset every plot_every iters
four_letter_words = [word for word in words if len(word) <=3]

start = time.time()

for iter in range(1, n_iters + 1):
    input_tensor, target_tensor = getRandomTraningTensorSet(four_letter_words)
    output, loss = train(input_tensor, target_tensor, rnn)
    total_loss += loss

    if iter % print_every == 0:
        print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss))

    if iter % plot_every == 0:
        all_losses.append(total_loss / plot_every)
        total_loss = 0

0m 16s (5000 5%) 3.0619
0m 32s (10000 10%) 3.7339
0m 48s (15000 15%) 4.8852
1m 2s (20000 20%) 2.5823
1m 16s (25000 25%) 0.5806
1m 33s (30000 30%) 2.6278
1m 51s (35000 35%) 0.5961
2m 8s (40000 40%) 3.2147
2m 23s (45000 45%) 0.4084
2m 39s (50000 50%) 1.2613
2m 59s (55000 55%) 0.2334
3m 17s (60000 60%) 1.8040
3m 35s (65000 65%) 1.1736
3m 54s (70000 70%) 1.2111
4m 8s (75000 75%) 1.8771
4m 28s (80000 80%) 0.0334
4m 48s (85000 85%) 0.8815
5m 7s (90000 90%) 0.0637
5m 23s (95000 95%) 0.0268
5m 43s (100000 100%) 0.1263


In [196]:
for pred in [evaluate('c', rnn) for i in range(50)]:
    print(pred)

ccannyynyyyyyyByyyyyy
ccannyzyyyyzyyyyyyyAy
ccannyyyyyyyyyyyyyjyy
ccannnyyyyyywyyyFyyyy
cpanyyyyyymyyyyyyyiyy
ccannyyyyyyyyyyyWyyty
ccannyGydyyyyyyyyyyyN
ccannyyyyyyyUyyyyyyyy
ccannyyyyyyyyyyyyyyyy
ccannyyyyyyyyyyyyyYyy
ccannyryyyyyyyyyyyyyy
ccannyyyyyyyyyywyyyyy
ccannyyyyyhyyynyyyyyy
ccanayyyyyyyyyyjyyyyy
ccannyyyyyyyyyyGCyyyy
ccanyyyyyyyyyyyyyQyyy
ccannyyyyyyyyyyyyyyyy
cchnyyyyyyyymyyyyyyyy
ccannyynyyyyyyyyZyyyy
ccannyyyyyjAyyyyyyyyy
ccannyyyyyyyyyyyyyyyy
ccJnynnyyyyyyyyyyyyoy
ccannyyyyyyysyyyykyyy
ccaYnnyyyyMyyyyyjyyyy
ccannyyyySyyyyyyjyyyy
ccannyyyyyyyyyyyyxyyy
cvanyyyyUyyyyyyyyyyyy
ccannyqyyyyyyyyymyygy
ccannyyyvyyyyyyyyyyyy
ccanyyyyyyyyyyyyyyyyy
cuanyyyyyyyyyyjyyyyyy
ccanayyyyyyynyyyyyyyy
ccannnnyyyyyyyyyyyyyy
ccannyyyyyyyysyyymyyy
ccannyyyyyynyyyywyyyy
ccannyyyyyyyyyyyyyyyy
ccannyyyyyyyyyyyyyyyx
cTanyyyyyyyyyyMyysyny
ccannyyyyyyyyOyyyyzyy
ccaqnyyyyyyyyyywyyyyA
cpanyyIyyyyyyyyyyyyyy
ccannnnyyygyyyyyyyyyy
ccannyyyyyyyyyyyyyyyy
ccannyynyyyyyyylyyyyy
ccannyynyyyytyyWyyyyy
ccaMyyyyyy

In [187]:

class secondRNN(nn.Module):
    '''My second ever Recurrent Neural Network
    
    This one's has got a few more layers in hope that it will be much more capable
    to predict closely correlated letters.
    '''
    
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        
        # Top-level layers
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        
        # Middle-stack layers
        self.h2h1 = nn.Linear(hidden_size, hidden_size)
        self.h2h2 = nn.Linear(hidden_size, hidden_size)
        self.h2h3 = nn.Linear(hidden_size, hidden_size)
        
        self.io2o1 = nn.Linear(output_size, output_size)
        self.io2o2 = nn.Linear(output_size, output_size)
        self.io2o3 = nn.Linear(output_size, output_size)
        
        # bottom-level layer
        self.o2o = nn.Linear(output_size + hidden_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        '''Forward function for training'''
        input_combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        
        hidden = self.h2h1(hidden)
        output = self.io2o1(output)
        hidden = self.h2h2(hidden)
        output = self.io2o2(output)
        hidden = self.h2h3(hidden)
        output = self.io2o3(output)
        
        combined_output = torch.cat((output, hidden), 1)
        output = self.o2o(combined_output)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden
        
    def getInitialisedHidden(self):
        return torch.ones([1, self.hidden_size])
        

In [189]:

rnn = secondRNN(LETTERS_TOTAL, 128, LETTERS_TOTAL)

n_iters = 100000
print_every = 5000
plot_every = 500
all_losses = []
total_loss = 0 # Reset every plot_every iters

start = time.time()

for iter in range(1, n_iters + 1):
    input_tensor, target_tensor = getRandomTraningTensorSet(['fuck', 'shit', 'hell', 'hole', 'hill', 'piss', 'poop'])
    output, loss = train(input_tensor, target_tensor, rnn)
    total_loss += loss

    if iter % print_every == 0:
        print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss))

    if iter % plot_every == 0:
        all_losses.append(total_loss / plot_every)
        total_loss = 0

0m 43s (5000 5%) 1.4400
1m 26s (10000 10%) 2.3192
2m 9s (15000 15%) 0.0271
2m 54s (20000 20%) 0.0127
3m 43s (25000 25%) nan
4m 24s (30000 30%) nan
5m 6s (35000 35%) nan
5m 51s (40000 40%) nan
6m 38s (45000 45%) nan
7m 21s (50000 50%) nan
8m 1s (55000 55%) nan
8m 40s (60000 60%) nan
9m 21s (65000 65%) nan
9m 55s (70000 70%) nan
10m 30s (75000 75%) nan
11m 9s (80000 80%) nan
11m 50s (85000 85%) nan
12m 37s (90000 90%) nan
13m 21s (95000 95%) nan
13m 59s (100000 100%) nan


In [186]:
evaluate('f', rnn)

'fhhoeeeeeeeeeeeeepeep'

In [190]:
# The thing just keeps crashing with either 3 or 5 layers.
