In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
with open('./shakespeare.txt', 'r', encoding='utf8') as f:
    text = f.read()

In [4]:
type(text)

str

In [5]:
text[:1000]

"\n                     1\n  From fairest creatures we desire increase,\n  That thereby beauty's rose might never die,\n  But as the riper should by time decease,\n  His tender heir might bear his memory:\n  But thou contracted to thine own bright eyes,\n  Feed'st thy light's flame with self-substantial fuel,\n  Making a famine where abundance lies,\n  Thy self thy foe, to thy sweet self too cruel:\n  Thou that art now the world's fresh ornament,\n  And only herald to the gaudy spring,\n  Within thine own bud buriest thy content,\n  And tender churl mak'st waste in niggarding:\n    Pity the world, or else this glutton be,\n    To eat the world's due, by the grave and thee.\n\n\n                     2\n  When forty winters shall besiege thy brow,\n  And dig deep trenches in thy beauty's field,\n  Thy youth's proud livery so gazed on now,\n  Will be a tattered weed of small worth held:  \n  Then being asked, where all thy beauty lies,\n  Where all the treasure of thy lusty days;\n  To sa

In [6]:
print(text[:1000])


                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bud buriest thy content,
  And tender churl mak'st waste in niggarding:
    Pity the world, or else this glutton be,
    To eat the world's due, by the grave and thee.


                     2
  When forty winters shall besiege thy brow,
  And dig deep trenches in thy beauty's field,
  Thy youth's proud livery so gazed on now,
  Will be a tattered weed of small worth held:  
  Then being asked, where all thy beauty lies,
  Where all the treasure of thy lusty days;
  To say within thine own deep su

In [7]:
len(text)

5445609

In [8]:
all_characters = set(text)
all_characters

{'\n',
 ' ',
 '!',
 '"',
 '&',
 "'",
 '(',
 ')',
 ',',
 '-',
 '.',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '<',
 '>',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 '[',
 ']',
 '_',
 '`',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '|',
 '}'}

In [9]:
len(all_characters)

84

In [10]:
for pair in enumerate(all_characters):
    print(pair)

(0, 'a')
(1, 'k')
(2, '\n')
(3, 't')
(4, 'O')
(5, '!')
(6, '2')
(7, 'M')
(8, '9')
(9, 'i')
(10, '-')
(11, '5')
(12, '4')
(13, ',')
(14, '_')
(15, 'D')
(16, 'x')
(17, 'Y')
(18, '<')
(19, 'L')
(20, 'J')
(21, 'l')
(22, 'd')
(23, '(')
(24, '7')
(25, '`')
(26, '&')
(27, ' ')
(28, 'S')
(29, 'q')
(30, 'Z')
(31, ':')
(32, 'p')
(33, 'X')
(34, 'C')
(35, '[')
(36, 'N')
(37, 'm')
(38, 'A')
(39, '.')
(40, 'Q')
(41, 'I')
(42, 'h')
(43, 'T')
(44, 'w')
(45, 'P')
(46, 'e')
(47, '0')
(48, 'g')
(49, '|')
(50, 'F')
(51, '>')
(52, 'n')
(53, '8')
(54, 'W')
(55, '3')
(56, 'j')
(57, '"')
(58, 'B')
(59, 'z')
(60, 'b')
(61, 'E')
(62, '6')
(63, 'G')
(64, 'y')
(65, ']')
(66, '1')
(67, ')')
(68, 'R')
(69, "'")
(70, '}')
(71, 'r')
(72, ';')
(73, 'V')
(74, 'c')
(75, 'f')
(76, 'K')
(77, 's')
(78, '?')
(79, 'o')
(80, 'H')
(81, 'v')
(82, 'u')
(83, 'U')


In [11]:
decoder = dict(enumerate(all_characters))
decoder

{0: 'a',
 1: 'k',
 2: '\n',
 3: 't',
 4: 'O',
 5: '!',
 6: '2',
 7: 'M',
 8: '9',
 9: 'i',
 10: '-',
 11: '5',
 12: '4',
 13: ',',
 14: '_',
 15: 'D',
 16: 'x',
 17: 'Y',
 18: '<',
 19: 'L',
 20: 'J',
 21: 'l',
 22: 'd',
 23: '(',
 24: '7',
 25: '`',
 26: '&',
 27: ' ',
 28: 'S',
 29: 'q',
 30: 'Z',
 31: ':',
 32: 'p',
 33: 'X',
 34: 'C',
 35: '[',
 36: 'N',
 37: 'm',
 38: 'A',
 39: '.',
 40: 'Q',
 41: 'I',
 42: 'h',
 43: 'T',
 44: 'w',
 45: 'P',
 46: 'e',
 47: '0',
 48: 'g',
 49: '|',
 50: 'F',
 51: '>',
 52: 'n',
 53: '8',
 54: 'W',
 55: '3',
 56: 'j',
 57: '"',
 58: 'B',
 59: 'z',
 60: 'b',
 61: 'E',
 62: '6',
 63: 'G',
 64: 'y',
 65: ']',
 66: '1',
 67: ')',
 68: 'R',
 69: "'",
 70: '}',
 71: 'r',
 72: ';',
 73: 'V',
 74: 'c',
 75: 'f',
 76: 'K',
 77: 's',
 78: '?',
 79: 'o',
 80: 'H',
 81: 'v',
 82: 'u',
 83: 'U'}

In [12]:
encoder = {char: ind for ind,char in decoder.items()}
encoder

{'a': 0,
 'k': 1,
 '\n': 2,
 't': 3,
 'O': 4,
 '!': 5,
 '2': 6,
 'M': 7,
 '9': 8,
 'i': 9,
 '-': 10,
 '5': 11,
 '4': 12,
 ',': 13,
 '_': 14,
 'D': 15,
 'x': 16,
 'Y': 17,
 '<': 18,
 'L': 19,
 'J': 20,
 'l': 21,
 'd': 22,
 '(': 23,
 '7': 24,
 '`': 25,
 '&': 26,
 ' ': 27,
 'S': 28,
 'q': 29,
 'Z': 30,
 ':': 31,
 'p': 32,
 'X': 33,
 'C': 34,
 '[': 35,
 'N': 36,
 'm': 37,
 'A': 38,
 '.': 39,
 'Q': 40,
 'I': 41,
 'h': 42,
 'T': 43,
 'w': 44,
 'P': 45,
 'e': 46,
 '0': 47,
 'g': 48,
 '|': 49,
 'F': 50,
 '>': 51,
 'n': 52,
 '8': 53,
 'W': 54,
 '3': 55,
 'j': 56,
 '"': 57,
 'B': 58,
 'z': 59,
 'b': 60,
 'E': 61,
 '6': 62,
 'G': 63,
 'y': 64,
 ']': 65,
 '1': 66,
 ')': 67,
 'R': 68,
 "'": 69,
 '}': 70,
 'r': 71,
 ';': 72,
 'V': 73,
 'c': 74,
 'f': 75,
 'K': 76,
 's': 77,
 '?': 78,
 'o': 79,
 'H': 80,
 'v': 81,
 'u': 82,
 'U': 83}

In [13]:
encoder_text = np.array([encoder[char] for char in text])
encoder_text[:500]

array([ 2, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
       27, 27, 27, 27, 27, 66,  2, 27, 27, 50, 71, 79, 37, 27, 75,  0,  9,
       71, 46, 77,  3, 27, 74, 71, 46,  0,  3, 82, 71, 46, 77, 27, 44, 46,
       27, 22, 46, 77,  9, 71, 46, 27,  9, 52, 74, 71, 46,  0, 77, 46, 13,
        2, 27, 27, 43, 42,  0,  3, 27,  3, 42, 46, 71, 46, 60, 64, 27, 60,
       46,  0, 82,  3, 64, 69, 77, 27, 71, 79, 77, 46, 27, 37,  9, 48, 42,
        3, 27, 52, 46, 81, 46, 71, 27, 22,  9, 46, 13,  2, 27, 27, 58, 82,
        3, 27,  0, 77, 27,  3, 42, 46, 27, 71,  9, 32, 46, 71, 27, 77, 42,
       79, 82, 21, 22, 27, 60, 64, 27,  3,  9, 37, 46, 27, 22, 46, 74, 46,
        0, 77, 46, 13,  2, 27, 27, 80,  9, 77, 27,  3, 46, 52, 22, 46, 71,
       27, 42, 46,  9, 71, 27, 37,  9, 48, 42,  3, 27, 60, 46,  0, 71, 27,
       42,  9, 77, 27, 37, 46, 37, 79, 71, 64, 31,  2, 27, 27, 58, 82,  3,
       27,  3, 42, 79, 82, 27, 74, 79, 52,  3, 71,  0, 74,  3, 46, 22, 27,
        3, 79, 27,  3, 42

In [14]:
decoder[12]

'4'

In [15]:
def one_hot_encoder(encoded_text, num_uni_chars):
    
    #encoded_text : batch of encoded text
    #num_uni_chars : len(set(text))
    
    one_hot = np.zeros((encoded_text.size, num_uni_chars)) #num_chars_in_batch(s) x unique_chars
    one_hot = one_hot.astype(np.float32)
    one_hot[ np.arange(one_hot.shape[0]), encoded_text.flatten()]=1.0
    #Resize to normal format
    one_hot = one_hot.reshape((*encoded_text.shape,num_uni_chars))
    return one_hot

In [16]:
encoder_text.flatten().shape

(5445609,)

In [17]:
a = np.zeros((9,3)) #Normal format 3x3
a

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [18]:
b = np.array([1,2,0]*3)

In [19]:
a[np.arange(a.shape[0]), b.flatten()] = 1
a

array([[0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.]])

In [20]:
a = a.reshape((*(3,3),3))
a

array([[[0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.]],

       [[0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.]],

       [[0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.]]])

In [21]:
a = (3,3)
a

(3, 3)

In [22]:
def foo(bar,l):
    print(bar,l)

baz = [1, 2]

foo(*baz)

1 2


In [23]:
arr = np.array([1,2,0])
arr

array([1, 2, 0])

In [24]:
one_hot_encoder(arr,3)

array([[0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.]], dtype=float32)

In [25]:
example_text = np.arange(10)
example_text

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [26]:
example_text.reshape(5,-1)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [33]:
def generate_batches(encoded_text, samp_per_batch=10, seq_len=50):
    #x : encoded_text of length seq_len
    #y : encoded_text shifted by 1
    char_per_batch = samp_per_batch * seq_len #total chars used per batch
    num_batches_available = int(len(encoded_text) / char_per_batch)
    encoded_text = encoded_text[:num_batches_available*char_per_batch] #limit the size of original text
    encoded_text = encoded_text.reshape((samp_per_batch, -1))
    
    for n in range(0, encoded_text.shape[1], seq_len):
        x = encoded_text[:, n:n+seq_len]
        y = np.zeros_like(x)
        y[:,:-1] = x[:,1:]
        try:
            y[:,-1] = encoded_text[:, n+seq_len]
        except:
            y[:,-1] = encoded_text[:,0]
        yield x,y
        

In [34]:
sample_text = encoder_text[:20]
sample_text

array([ 2, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
       27, 27, 27])

In [35]:
batch_generator = generate_batches(sample_text, samp_per_batch=2,seq_len=5)

In [36]:
x,y = next(batch_generator)

In [37]:
x

array([[ 2, 27, 27, 27, 27],
       [27, 27, 27, 27, 27]])

In [38]:
y

array([[27, 27, 27, 27, 27],
       [27, 27, 27, 27, 27]])

In [51]:
class CharModel(nn.Module):
    def __init__(self, all_chars, num_hidden = 256, num_layers=3, drop_prob=0.5, use_gpu=False):
        super().__init__()
        self.drop_prob_prob = drop_prob
        self.num_layers = num_layers
        self.num_hidden = num_hidden
        self.use_gpu = use_gpu
        
        self.all_chars = all_chars
        self.decoder = dict(enumerate(all_chars))
        self.encoder = {char:ind for ind, char in decoder.items()}
        
        #Batch first : (batch, seq, feature)
        self.lstm = nn.LSTM(len(self.all_chars), num_hidden, num_layers, dropout=drop_prob, batch_first=True)
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc_linear = nn.Linear(num_hidden, len(self.all_chars))
        
    def forward(self, x, hidden):
        lstm_output, hidden = self.lstm(x, hidden)
        
        drop_output = self.dropout(lstm_output)
        
        drop_output = drop_output.contiguous().view(-1, self.num_hidden)
        
        final_out = self.fc_linear(drop_output)
        return final_out, hidden
    
    def hidden_state(self, batch_size):
        if self.use_gpu:
            hidden = (torch.zeros(self.num_layers,batch_size,self.num_hidden).cuda(),
                      torch.zeros(self.num_layers,batch_size,self.num_hidden).cuda())
        else:
            hidden = (torch.zeros(self.num_layers,batch_size,self.num_hidden),
                      torch.zeros(self.num_layers,batch_size,self.num_hidden))
        return hidden

In [52]:
model = CharModel(all_characters, num_hidden=512, num_layers=3, drop_prob=0.5, use_gpu=True)

In [53]:
total_params = []
for p in model.parameters():
    total_params.append(int(p.numel()))

In [54]:
sum(total_params)

5470292

In [55]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [56]:
train_percent = 0.9
train_ind = int(len(encoder_text)*train_percent)
train_data = encoder_text[:train_ind] 
test_data = encoder_text[train_ind:]

In [57]:
len(train_data)

4901048

In [58]:
len(test_data)

544561

In [59]:
import time
start_time = time.time()
epochs= 40
batch_size = 100
seq_len = 100

tracker = 0
num_chars = max(encoder_text)+1

model.train()
if model.use_gpu:
    model.cuda()

for i in range(epochs):
    hidden = model.hidden_state(batch_size)
    for x, y in generate_batches(train_data, batch_size, seq_len):
        
        tracker +=1
        x = one_hot_encoder(x, num_chars) #Before:(100, 100), After:(100, 100 , 84)
        inputs = torch.from_numpy(x)
        targets = torch.from_numpy(y)
        
        if model.use_gpu:
            inputs = inputs.cuda()
            targets = targets.cuda()
            
        hidden = tuple([state.data for state in hidden])
        model.zero_grad()
        
        lstm_out, hidden = model.forward(inputs, hidden) #lstm_out : torch.Size([10000, 84])
        loss = criterion(lstm_out, targets.view(batch_size*seq_len).long()) #target: (10000)
        loss.backward()
        
        #Getting rid of possible exploding gradient
        nn.utils.clip_grad_norm_(model.parameters(),max_norm=5)
        optimizer.step()
        
        #Check on validation
        if tracker %25==0:
            val_hidden = model.hidden_state(batch_size)
            val_losses = []
            model.eval()
            
            for x, y in generate_batches(test_data, batch_size, seq_len):
                x = one_hot_encoder(x, num_chars)
                inputs = torch.from_numpy(x)
                targets = torch.from_numpy(y)

                if model.use_gpu:
                    inputs = inputs.cuda()
                    targets = targets.cuda()
                val_hidden = tuple([state.data for state in val_hidden])
                lstm_out, val_hidden = model.forward(inputs, val_hidden)
                val_loss = criterion(lstm_out, targets.view(batch_size*seq_len).long())
                val_losses.append(val_loss.item())
                
            #Trainnin again
            model.train()
            print(f'EPOCH:{i} Step:{tracker} Val_loss:{val_loss.item()}')

final_time = start_time - time.time()
print(f'Training time was {final_time/60} minutes')

EPOCH:0 Step:25 Val_loss:3.203792095184326
EPOCH:0 Step:50 Val_loss:3.1922414302825928
EPOCH:0 Step:75 Val_loss:3.1915736198425293
EPOCH:0 Step:100 Val_loss:3.192152261734009
EPOCH:0 Step:125 Val_loss:3.061629295349121
EPOCH:0 Step:150 Val_loss:2.982011079788208
EPOCH:0 Step:175 Val_loss:2.895754337310791
EPOCH:0 Step:200 Val_loss:2.7833445072174072
EPOCH:0 Step:225 Val_loss:2.7041707038879395
EPOCH:0 Step:250 Val_loss:2.6285839080810547
EPOCH:0 Step:275 Val_loss:2.534939765930176
EPOCH:0 Step:300 Val_loss:2.429866313934326
EPOCH:0 Step:325 Val_loss:2.356574296951294
EPOCH:0 Step:350 Val_loss:2.283618211746216
EPOCH:0 Step:375 Val_loss:2.2151834964752197
EPOCH:0 Step:400 Val_loss:2.161541700363159
EPOCH:0 Step:425 Val_loss:2.1264209747314453
EPOCH:0 Step:450 Val_loss:2.081611156463623
EPOCH:0 Step:475 Val_loss:2.041590929031372
EPOCH:1 Step:500 Val_loss:2.010805368423462
EPOCH:1 Step:525 Val_loss:1.9800060987472534
EPOCH:1 Step:550 Val_loss:1.9530264139175415
EPOCH:1 Step:575 Val_loss:

EPOCH:9 Step:4525 Val_loss:1.3372199535369873
EPOCH:9 Step:4550 Val_loss:1.328216552734375
EPOCH:9 Step:4575 Val_loss:1.3360037803649902
EPOCH:9 Step:4600 Val_loss:1.3334412574768066
EPOCH:9 Step:4625 Val_loss:1.3290234804153442
EPOCH:9 Step:4650 Val_loss:1.3293308019638062
EPOCH:9 Step:4675 Val_loss:1.3312005996704102
EPOCH:9 Step:4700 Val_loss:1.3281508684158325
EPOCH:9 Step:4725 Val_loss:1.3277826309204102
EPOCH:9 Step:4750 Val_loss:1.329433798789978
EPOCH:9 Step:4775 Val_loss:1.3333945274353027
EPOCH:9 Step:4800 Val_loss:1.336094856262207
EPOCH:9 Step:4825 Val_loss:1.3352025747299194
EPOCH:9 Step:4850 Val_loss:1.330862283706665
EPOCH:9 Step:4875 Val_loss:1.3307867050170898
EPOCH:9 Step:4900 Val_loss:1.32881498336792
EPOCH:10 Step:4925 Val_loss:1.3321460485458374
EPOCH:10 Step:4950 Val_loss:1.3341333866119385
EPOCH:10 Step:4975 Val_loss:1.329681396484375
EPOCH:10 Step:5000 Val_loss:1.3314917087554932
EPOCH:10 Step:5025 Val_loss:1.3284004926681519
EPOCH:10 Step:5050 Val_loss:1.324951

EPOCH:18 Step:8925 Val_loss:1.3079910278320312
EPOCH:18 Step:8950 Val_loss:1.296520709991455
EPOCH:18 Step:8975 Val_loss:1.2997502088546753
EPOCH:18 Step:9000 Val_loss:1.3057318925857544
EPOCH:18 Step:9025 Val_loss:1.3056557178497314
EPOCH:18 Step:9050 Val_loss:1.308991551399231
EPOCH:18 Step:9075 Val_loss:1.2994381189346313
EPOCH:18 Step:9100 Val_loss:1.3026940822601318
EPOCH:18 Step:9125 Val_loss:1.3026379346847534
EPOCH:18 Step:9150 Val_loss:1.3018511533737183
EPOCH:18 Step:9175 Val_loss:1.3060081005096436
EPOCH:18 Step:9200 Val_loss:1.303269863128662
EPOCH:18 Step:9225 Val_loss:1.308787226676941
EPOCH:18 Step:9250 Val_loss:1.3066060543060303
EPOCH:18 Step:9275 Val_loss:1.3056020736694336
EPOCH:18 Step:9300 Val_loss:1.3045344352722168
EPOCH:19 Step:9325 Val_loss:1.3029792308807373
EPOCH:19 Step:9350 Val_loss:1.302891731262207
EPOCH:19 Step:9375 Val_loss:1.3066977262496948
EPOCH:19 Step:9400 Val_loss:1.3039144277572632
EPOCH:19 Step:9425 Val_loss:1.3095840215682983
EPOCH:19 Step:9450

EPOCH:27 Step:13250 Val_loss:1.295517086982727
EPOCH:27 Step:13275 Val_loss:1.2996731996536255
EPOCH:27 Step:13300 Val_loss:1.2996991872787476
EPOCH:27 Step:13325 Val_loss:1.299917459487915
EPOCH:27 Step:13350 Val_loss:1.2988203763961792
EPOCH:27 Step:13375 Val_loss:1.2920480966567993
EPOCH:27 Step:13400 Val_loss:1.2950773239135742
EPOCH:27 Step:13425 Val_loss:1.2984477281570435
EPOCH:27 Step:13450 Val_loss:1.2962560653686523
EPOCH:27 Step:13475 Val_loss:1.2962371110916138
EPOCH:27 Step:13500 Val_loss:1.2922207117080688
EPOCH:27 Step:13525 Val_loss:1.285462737083435
EPOCH:27 Step:13550 Val_loss:1.2887920141220093
EPOCH:27 Step:13575 Val_loss:1.2936649322509766
EPOCH:27 Step:13600 Val_loss:1.2960940599441528
EPOCH:27 Step:13625 Val_loss:1.2984123229980469
EPOCH:27 Step:13650 Val_loss:1.297707200050354
EPOCH:27 Step:13675 Val_loss:1.2961639165878296
EPOCH:27 Step:13700 Val_loss:1.2961616516113281
EPOCH:28 Step:13725 Val_loss:1.2991211414337158
EPOCH:28 Step:13750 Val_loss:1.2989100217819

EPOCH:35 Step:17550 Val_loss:1.2888833284378052
EPOCH:35 Step:17575 Val_loss:1.2908598184585571
EPOCH:35 Step:17600 Val_loss:1.292309045791626
EPOCH:35 Step:17625 Val_loss:1.2937982082366943
EPOCH:36 Step:17650 Val_loss:1.2917218208312988
EPOCH:36 Step:17675 Val_loss:1.2930468320846558
EPOCH:36 Step:17700 Val_loss:1.294142723083496
EPOCH:36 Step:17725 Val_loss:1.2937411069869995
EPOCH:36 Step:17750 Val_loss:1.2893911600112915
EPOCH:36 Step:17775 Val_loss:1.2838099002838135
EPOCH:36 Step:17800 Val_loss:1.2895528078079224
EPOCH:36 Step:17825 Val_loss:1.293517827987671
EPOCH:36 Step:17850 Val_loss:1.2902915477752686
EPOCH:36 Step:17875 Val_loss:1.2898502349853516
EPOCH:36 Step:17900 Val_loss:1.283109426498413
EPOCH:36 Step:17925 Val_loss:1.289252758026123
EPOCH:36 Step:17950 Val_loss:1.290122389793396
EPOCH:36 Step:17975 Val_loss:1.2897348403930664
EPOCH:36 Step:18000 Val_loss:1.2920572757720947
EPOCH:36 Step:18025 Val_loss:1.2929400205612183
EPOCH:36 Step:18050 Val_loss:1.294247865676879

In [None]:
x

In [None]:
one_hot_encoder(x,84).shape

In [None]:
y

In [81]:
lstm = nn.LSTM(3, 10)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 10), torch.randn(1, 1, 10))  # clean out hidden state
out, hidden = lstm(inputs, hidden)

In [82]:
inputs.shape

torch.Size([5, 1, 3])

In [83]:
out.shape

torch.Size([5, 1, 10])

In [103]:
targets.view()

torch.Size([100, 100])

In [62]:
model_name = 'hidden512_layers3_shakes.net'
torch.save(model.state_dict(), model_name)

In [64]:
model.encoder['K']

76

In [70]:
np.array([[model.encoder['K']]]), np.array([[model.encoder['K']]]).shape 

(array([[76]]), (1, 1))

In [73]:
one_hot_encoder(np.array([[model.encoder['K']]]), len(model.all_chars)), one_hot_encoder(np.array([[model.encoder['K']]]), len(model.all_chars)).shape

(array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
          0., 0., 0., 0.]]], dtype=float32),
 (1, 1, 84))

In [76]:
torch.from_numpy(one_hot_encoder(np.array([[model.encoder['K']]]), len(model.all_chars)))

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]]])

In [86]:
model.hidden_state(1)

(tensor([[[0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.]]], device='cuda:0'),
 tensor([[[0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.]]], device='cuda:0'))

In [87]:
tuple([state.data for state in model.hidden_state(1)])

(tensor([[[0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.]]], device='cuda:0'),
 tensor([[[0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.]]], device='cuda:0'))

In [79]:
inp, outs = model(torch.from_numpy(one_hot_encoder(np.array([[model.encoder['K']]]), len(model.all_chars))).cuda(),
                  model.hidden_state(1))

In [81]:
inp, inp.shape

(tensor([[ 0.8353, -1.7359,  2.5463, -2.7098, -3.3728,  0.9534, -2.1307, -2.6921,
          -0.3939,  1.1469,  0.5161, -1.4732, -1.2952,  1.9920, -0.1059, -0.8318,
          -7.6345, -2.0398, -2.6767, -0.7123, -4.6380, -2.3536, -3.7381, -2.3122,
          -1.6228,  2.0972, -2.5732,  2.3281,  2.8110, -4.2836,  0.1369,  0.9350,
           1.1316, -2.5912, -0.7516, -1.4473,  2.7744, -1.9449, -0.1226,  2.5396,
          -4.4774,  0.5985,  0.5502, -3.0845,  0.2442,  0.4286,  0.6152, -0.9765,
          -3.2988, -2.1381, -1.2280, -1.1823, -0.8534, -0.8591, -0.4164, -0.5458,
          -4.3499, -1.9504,  1.7265, -3.6936,  1.9697, -0.1959, -1.8800,  0.0487,
          -3.3206,  1.0551, -1.1534, -0.9623,  0.0830,  0.1731, -1.4498, -5.3504,
           1.4350, -0.5176, -2.6399, -0.8766, -1.4779, -0.4831,  1.0546, -2.4944,
          -4.8549, -4.8643, -3.5477, -1.7324]], device='cuda:0',
        grad_fn=<AddmmBackward0>),
 torch.Size([1, 84]))

In [88]:
F.softmax(inp, dim=1).topk(1)

torch.return_types.topk(
values=tensor([[0.1122]], device='cuda:0', grad_fn=<TopkBackward0>),
indices=tensor([[28]], device='cuda:0'))

In [97]:
def predict_next_char(model, char, hidden=None, k=1):
    encoded_text = model.encoder[char]
    encoded_text = np.array([[encoded_text]])
    encoded_text = one_hot_encoder(encoded_text, len(model.all_chars))
    inputs = torch.from_numpy(encoded_text)
    if model.use_gpu:
        inputs = inputs.cuda()
    hidden = tuple([state.data for state in hidden])
    lstm_out, hidden = model(inputs, hidden)
    probs = F.softmax(lstm_out, dim=1).data
    
    if model.use_gpu:
        probs = probs.cpu()
    
    probs, index_pos = probs.topk(k)
    index_pos = index_pos.numpy().squeeze()
    probs = probs.numpy().flatten()
    probs = probs/probs.sum()
    char = np.random.choice(index_pos, p=probs)
    return model.decoder[char], hidden

In [98]:
def generate_text(model, size, seed='The', k=1):
    if model.use_gpu:
        model.cuda()
    else:
        model.cpu()
        
    model.eval()
    output_chars = [c for c in seed]
    hidden = model.hidden_state(1)
    
    for char in seed:
        char,hidden = predict_next_char(model,  char, hidden, k)
    output_chars.append(char)
    
    for i in range(size):
        char, hidden = predict_next_char(model, output_chars[-1], hidden, k)
        output_chars.append(char)
    return ''.join(output_chars)
    

In [100]:
print(generate_text(model, 1000, seed='The', k=3))

TheR BANDITTIE and ANTONY

Enter CAESAR, and others, attended with the tongues of the country,
                                    the stocks of her sound  

                         Enter CLEOPATRA with a part of the confidence

  PROSPERO. A soul on high and soldier, and the stars,
    A soul to this that there were beard a wars,
    And therefore, and a wanton watch of state
    Than the time so still would be so to thee.
    Therefore I will, and the success of his
    That should be that to them and so thou art
    Without thy branches.
  CLEOPATRA. Why, here it well.
                       Exeunt CONSTABLE, and SIR HUGH EVANS
    The streets, my liege! What shall I see thy hand,
    And there is the companion of the world,
    The cares of that the commons that the face
    Will show his foreshe seek to speak? This strength
    Which should have then the state of this third body,
    That what he should be so so sensel's sound,
    Till he did see a shoulder of this son,
    They