# Data Download


In [1]:
!wget https://raw.githubusercontent.com/udacity/deep-learning-v2-pytorch/master/recurrent-neural-networks/char-rnn/data/anna.txt
!mv /content/anna.txt /content/data.txt

--2019-03-24 09:36:45--  https://raw.githubusercontent.com/udacity/deep-learning-v2-pytorch/master/recurrent-neural-networks/char-rnn/data/anna.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2025486 (1.9M) [text/plain]
Saving to: ‘anna.txt’


2019-03-24 09:36:45 (26.1 MB/s) - ‘anna.txt’ saved [2025486/2025486]



![](https://github.com/udacity/deep-learning-v2-pytorch/raw/9b6001a7163e94eda9941ae448ce522eb7ba3a7d/recurrent-neural-networks/char-rnn/assets/charseq.jpeg)

# Data Process

In [9]:
DATA_FILE = '/content/data.txt'

with open(DATA_FILE, 'r') as f:
  text = f.read()
  
text[:200]

"Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverything was in confusion in the Oblonskys' house. The wife had\ndiscovered that the husband was carrying on"

## Tokenize data

In [12]:
import numpy as np

char = list(set(text))
int2char = dict(enumerate(char))
char2int = {ch: i for i, ch in int2char.items()}

encoded = np.array([char2int[ch] for ch in text])
print(text[:100])
print('\nis encoded as \n',encoded[:100])

Chapter 1


Happy families are all alike; every unhappy family is unhappy in its own
way.

Everythin

is encoded as 
 [55 23 62 80 69  0 47 75 60 82 82 82 56 62 80 80 27 75 53 62 18 19 74 19
  0 13 75 62 47  0 75 62 74 74 75 62 74 19 81  0 25 75  0 64  0 47 27 75
 32 48 23 62 80 80 27 75 53 62 18 19 74 27 75 19 13 75 32 48 23 62 80 80
 27 75 19 48 75 19 69 13 75 52 36 48 82 36 62 27  1 82 82 26 64  0 47 27
 69 23 19 48]


In [17]:
len(char)

83

## One-Hot encode chars

In [13]:
def onehot(arr, nlabels):
  one_hot = np.zeros((np.multiply(*arr.shape), nlabels), dtype=np.float32)
  one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1
  one_hot = one_hot.reshape((*arr.shape, nlabels))
  return one_hot

print(onehot(np.array([[1,2,3]]), 4))


[[[0. 1. 0. 0.]
  [0. 0. 1. 0.]
  [0. 0. 0. 1.]]]


## Batch Generator

In [14]:
def batcher(arr, batch_size, seq_len):
  n_batches = len(arr)//(batch_size * seq_len)
  arr = arr[:(n_batches * batch_size * seq_len)]
  arr = arr.reshape((batch_size, -1))
  for n in range(0, arr.shape[1], seq_len):
    x = arr[:, n:n+seq_len]
    y = np.zeros_like(x)
    try:
        y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_len]
    except IndexError:
        y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
    yield x, y
    
    
for x,y in batcher(np.arange(36),6, 3):
  print(x,'\n')
  print(y)
  break

[[ 0  1  2]
 [ 6  7  8]
 [12 13 14]
 [18 19 20]
 [24 25 26]
 [30 31 32]] 

[[ 1  2  3]
 [ 7  8  9]
 [13 14 15]
 [19 20 21]
 [25 26 27]
 [31 32 33]]


In [15]:
batches = batcher(encoded, 8, 50)
x, y = next(batches)

print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])

x
 [[55 23 62 80 69  0 47 75 60 82]
 [13 52 48 75 69 23 62 69 75 62]
 [ 0 48  4 75 52 47 75 62 75 53]
 [13 75 69 23  0 75 57 23 19  0]
 [75 13 62 36 75 23  0 47 75 69]
 [57 32 13 13 19 52 48 75 62 48]
 [75 37 48 48 62 75 23 62  4 75]
 [ 3 35 74 52 48 13 81 27  1 75]]

y
 [[23 62 80 69  0 47 75 60 82 82]
 [52 48 75 69 23 62 69 75 62 69]
 [48  4 75 52 47 75 62 75 53 52]
 [75 69 23  0 75 57 23 19  0 53]
 [13 62 36 75 23  0 47 75 69  0]
 [32 13 13 19 52 48 75 62 48  4]
 [37 48 48 62 75 23 62  4 75 13]
 [35 74 52 48 13 81 27  1 75 54]]


# Model
![](https://github.com/udacity/deep-learning-v2-pytorch/blob/master/recurrent-neural-networks/char-rnn/assets/charRNN.png?raw=true)

In [0]:
import torch
import torch.nn as nn

class CharacterRNN(nn.Module):
  def __init__(self, tokens, n_hidden=256, n_layers=2,
                               drop_prob=0.5, lr=0.001):
    super(CharacterRNN, self).__init__()
    self.drop_prob = drop_prob
    self.n_layers = n_layers
    self.n_hidden = n_hidden
    self.lr = lr

    self.chars = tokens
    self.int2char = dict(enumerate(self.chars))
    self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        
        
    self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)    
    self.dropout = nn.Dropout(drop_prob)
    self.fc1 = nn.Linear(n_hidden, 128)
    self.fc2 = nn.Linear(128, len(self.chars))
    self.relu = nn.ReLU()
    
  def forward(self, x, hidden):
    r_output, hidden = self.lstm(x, hidden)
    out = self.dropout(r_output)
    out = out.contiguous().view(-1, self.n_hidden)
    out = self.fc1(out)
    out = self.relu(out)
    out = self.fc2(out)
    return out, hidden
  
  def init_hidden(self, batch_size):
    weight = next(self.parameters()).data
    hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
              weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
    return hidden

# Training loop


In [0]:
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):

    net.train()
    
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    net.cuda()
    
    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
        h = net.init_hidden(batch_size)
        
        for x, y in batcher(data, batch_size, seq_length):
            counter += 1
            x = onehot(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            inputs, targets = inputs.cuda(), targets.cuda()

            h = tuple([each.data for each in h])

            net.zero_grad()
            
            output, h = net(inputs, h)
            
            loss = criterion(output, targets.view(batch_size*seq_length).long())
            loss.backward()
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()
            
            if counter % print_every == 0:
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in batcher(val_data, batch_size, seq_length):
                    x = onehot(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)

                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length).long())
                
                    val_losses.append(val_loss.item())
                
                net.train()
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Train Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [21]:
n_hidden=512
n_layers=2

net = CharacterRNN(char, n_hidden, n_layers)
print(net)

CharacterRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5)
  (fc1): Linear(in_features=512, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=83, bias=True)
  (relu): ReLU()
)


In [0]:
batch_size = 1500
seq_length = 100
n_epochs = 50 # start smaller if you are just testing initial behavior

# train the model
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

Epoch: 1/50... Step: 10... Train Loss: 3.3373... Val Loss: 3.3239
Epoch: 2/50... Step: 20... Train Loss: 3.1536... Val Loss: 3.1611


In [0]:
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

Epoch: 1/50... Step: 10... Loss: 1.7567... Val Loss: 1.7216
Epoch: 2/50... Step: 20... Loss: 1.7247... Val Loss: 1.6844
Epoch: 3/50... Step: 30... Loss: 1.6993... Val Loss: 1.6639
Epoch: 4/50... Step: 40... Loss: 1.6917... Val Loss: 1.6521
Epoch: 5/50... Step: 50... Loss: 1.6679... Val Loss: 1.6427
Epoch: 6/50... Step: 60... Loss: 1.6583... Val Loss: 1.6340
Epoch: 7/50... Step: 70... Loss: 1.6429... Val Loss: 1.6263
Epoch: 8/50... Step: 80... Loss: 1.6508... Val Loss: 1.6208
Epoch: 9/50... Step: 90... Loss: 1.6338... Val Loss: 1.6153
Epoch: 10/50... Step: 100... Loss: 1.6679... Val Loss: 1.6081
Epoch: 10/50... Step: 110... Loss: 1.6552... Val Loss: 1.6003
Epoch: 11/50... Step: 120... Loss: 1.6079... Val Loss: 1.5955
Epoch: 12/50... Step: 130... Loss: 1.6118... Val Loss: 1.5860
Epoch: 13/50... Step: 140... Loss: 1.5988... Val Loss: 1.5790
Epoch: 14/50... Step: 150... Loss: 1.5980... Val Loss: 1.5738
Epoch: 15/50... Step: 160... Loss: 1.5824... Val Loss: 1.5670
Epoch: 16/50... Step: 170.

In [0]:
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

Epoch: 1/50... Step: 10... Loss: 1.3621... Val Loss: 1.3971
Epoch: 2/50... Step: 20... Loss: 1.3452... Val Loss: 1.3813
Epoch: 3/50... Step: 30... Loss: 1.3278... Val Loss: 1.3716
Epoch: 4/50... Step: 40... Loss: 1.3272... Val Loss: 1.3676
Epoch: 5/50... Step: 50... Loss: 1.3135... Val Loss: 1.3655
Epoch: 6/50... Step: 60... Loss: 1.3075... Val Loss: 1.3634
Epoch: 7/50... Step: 70... Loss: 1.2980... Val Loss: 1.3614
Epoch: 8/50... Step: 80... Loss: 1.3082... Val Loss: 1.3594
Epoch: 9/50... Step: 90... Loss: 1.3035... Val Loss: 1.3576
Epoch: 10/50... Step: 100... Loss: 1.3472... Val Loss: 1.3560
Epoch: 10/50... Step: 110... Loss: 1.3454... Val Loss: 1.3534
Epoch: 11/50... Step: 120... Loss: 1.2875... Val Loss: 1.3526
Epoch: 12/50... Step: 130... Loss: 1.2964... Val Loss: 1.3528
Epoch: 13/50... Step: 140... Loss: 1.2884... Val Loss: 1.3494
Epoch: 14/50... Step: 150... Loss: 1.2936... Val Loss: 1.3480
Epoch: 15/50... Step: 160... Loss: 1.2831... Val Loss: 1.3488
Epoch: 16/50... Step: 170.

In [0]:
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.01, print_every=10)

Epoch: 1/50... Step: 10... Loss: 2.4489... Val Loss: 2.2425
Epoch: 2/50... Step: 20... Loss: 2.1286... Val Loss: 1.9960
Epoch: 3/50... Step: 30... Loss: 1.9278... Val Loss: 1.8161
Epoch: 4/50... Step: 40... Loss: 1.7454... Val Loss: 1.6524
Epoch: 5/50... Step: 50... Loss: 1.5961... Val Loss: 1.5337
Epoch: 6/50... Step: 60... Loss: 1.4860... Val Loss: 1.4587
Epoch: 7/50... Step: 70... Loss: 1.4121... Val Loss: 1.4166
Epoch: 8/50... Step: 80... Loss: 1.3875... Val Loss: 1.3887
Epoch: 9/50... Step: 90... Loss: 1.3477... Val Loss: 1.3676
Epoch: 10/50... Step: 100... Loss: 1.3630... Val Loss: 1.3591
Epoch: 10/50... Step: 110... Loss: 1.3514... Val Loss: 1.3462
Epoch: 11/50... Step: 120... Loss: 1.2825... Val Loss: 1.3387
Epoch: 12/50... Step: 130... Loss: 1.2804... Val Loss: 1.3301
Epoch: 13/50... Step: 140... Loss: 1.2675... Val Loss: 1.3295
Epoch: 14/50... Step: 150... Loss: 1.2601... Val Loss: 1.3211
Epoch: 15/50... Step: 160... Loss: 1.2433... Val Loss: 1.3174
Epoch: 16/50... Step: 170.

In [0]:
train(net, encoded, epochs=50, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

Epoch: 1/50... Step: 10... Loss: 1.0796... Val Loss: 1.2668
Epoch: 2/50... Step: 20... Loss: 1.0784... Val Loss: 1.2690
Epoch: 3/50... Step: 30... Loss: 1.0722... Val Loss: 1.2702
Epoch: 4/50... Step: 40... Loss: 1.0758... Val Loss: 1.2699
Epoch: 5/50... Step: 50... Loss: 1.0693... Val Loss: 1.2699
Epoch: 6/50... Step: 60... Loss: 1.0661... Val Loss: 1.2680
Epoch: 7/50... Step: 70... Loss: 1.0617... Val Loss: 1.2678
Epoch: 8/50... Step: 80... Loss: 1.0648... Val Loss: 1.2672
Epoch: 9/50... Step: 90... Loss: 1.0639... Val Loss: 1.2662
Epoch: 10/50... Step: 100... Loss: 1.1284... Val Loss: 1.2685
Epoch: 10/50... Step: 110... Loss: 1.1154... Val Loss: 1.2699
Epoch: 11/50... Step: 120... Loss: 1.0544... Val Loss: 1.2713
Epoch: 12/50... Step: 130... Loss: 1.0573... Val Loss: 1.2715
Epoch: 13/50... Step: 140... Loss: 1.0565... Val Loss: 1.2712
Epoch: 14/50... Step: 150... Loss: 1.0603... Val Loss: 1.2722
Epoch: 15/50... Step: 160... Loss: 1.0543... Val Loss: 1.2702
Epoch: 16/50... Step: 170.

# Prediction


In [0]:
import torch.nn.functional as F

def predict(net, char, h=None, top_k=None):
        x = np.array([[net.char2int[char]]])
        x = onehot(x, len(net.chars))
        inputs = torch.from_numpy(x)
        
        inputs = inputs.cuda()
        h = tuple([each.data for each in h])
        out, h = net(inputs, h)

        p = F.softmax(out, dim=1).data
        p = p.cpu()

        if top_k is None:
            top_ch = np.arange(len(net.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
            
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())

        return net.int2char[char], h


In [0]:
def sample(net, size, prime='The', top_k=None):
        
    net.cuda()
    
    net.eval()
    
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)
    
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [0]:
print(sample(net, 1000, prime='He ', top_k=5))

He had spoken to her as he was already began again
the tall there; and his heart should get a sense of things, she
stroked her hand and told him, that he could not say what, he went up, that she
writes to him the same society almost the complex on the same sour herself and the
more and mere children of Vronsky, and he could not go on worn. The
minute at the sight of her husband was a corrept to this subject. The
sight of hind and walked out of his sense.

"You did not like some passion?" said Anna, smiling, at her head was in
silence.

"Ah! I don't want the sound of the whole person," said Levin with
promising talk in her eyes, and as though he was ashamed of her
soul, and went into a play taking her eyes off her, she was at from
the same door. He was told him the child's expression which showed
him immeniever this way one of the study had been surd the sound of the
short presence of husband and tack, and an artesting for a man was the same
significance to a conversation to his wife, a