## Checking GPU, import libs

In [None]:
!nvidia-smi

Wed Jul  7 00:07:26 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.27       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    26W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

# check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Loading Data

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
!ls gdrive/MyDrive/MachineLearning/

 anna.txt  'Necron-bridge(2).txt'   rnn.net


In [None]:
 with open('gdrive/MyDrive/MachineLearning/pride-prej.txt', 'r') as f:
   text = f.read()

In [None]:
def one_hot_encode(arr, n_labels):
  # Initialize
  one_hot = np.zeros((arr.size, n_labels), dtype=np.float32)
    
  # Fill with zero
  one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
  # Reshape to original
  one_hot = one_hot.reshape((*arr.shape, n_labels))
    
  return one_hot

In [None]:
def get_batches(arr, batch_size, seq_length):
  total_batch_size = batch_size * seq_length

  # find the amount of batches we can make in the array
  n_batches = len(arr) // total_batch_size

  # following will cut out anything in the array we're not going to use
  arr = arr[:n_batches * total_batch_size]

  # reshape into rows to separate batches
  arr = arr.reshape((batch_size, -1))

  for n in range(0, arr.shape[1], seq_length):
    x = arr[:, n:n+seq_length]

    y = np.zeros_like(x)
    try:
      y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
    except IndexError:
      y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
    yield x, y

## Model

In [None]:
class CharRNN(nn.Module):
  def __init__(self, tokens, n_hidden=256, n_layers=2, drop_prob=0.3, lr=3E-4):
    super(CharRNN, self).__init__()

    self.drop_prob = drop_prob
    self.n_layers = n_layers
    self.n_hidden = n_hidden
    self.lr = lr

    self.chars = tokens
    self.int_char = dict(enumerate(self.chars))
    self.char_int = {ch: ii for ii, ch in self.int_char.items()}

    # model layers
    self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, dropout=drop_prob,
                        batch_first=True)
    self.dropout = nn.Dropout(drop_prob)
    self.fc = nn.Linear(n_hidden, len(self.chars))


  def forward(self, x, hidden):
    r_output, hidden = self.lstm(x, hidden)

    # apply dropout, reshape, shove to fully connected
    out = self.dropout(r_output)
    out = out.contiguous().view(-1, self.n_hidden)
    out = self.fc(out)

    # return output, hidden state
    return out, hidden


  def init_hidden(self, batch_size):
    weight = next(self.parameters()).data

    hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device),
              weight.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device))
    
    return hidden

## Train

In [None]:
def train_loop(model, data, epochs=10, batch_size=10, seq_length=50, lr=3E-4, clip=5, valid_frac=0.2, print_every=100):
  model.to(device)

  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=lr)
  scaler = torch.cuda.amp.GradScaler()

  valid_index = int(len(data) * (1 - valid_frac))
  data, valid_data = data[:valid_index], data[valid_index:]

  count = 0
  count_since_last_save = 0
  n_chars = len(model.chars)
  valid_loss_min = np.Inf
  model.train()

  for e in range(epochs):
    h = model.init_hidden(batch_size)

    for x, y in get_batches(data, batch_size, seq_length):
      count += 1 # python please ++
      x = one_hot_encode(x, n_chars)
      inputs, targets = torch.from_numpy(x).to(device), torch.from_numpy(y).to(device)

      h = tuple([each.data for each in h])

      optimizer.zero_grad()

      with torch.cuda.amp.autocast():
        output, h = model(inputs, h)
      
      loss = criterion(output, targets.view(batch_size * seq_length).long())
      loss.backward()

      nn.utils.clip_grad_norm_(net.parameters(), clip)
      optimizer.step()

      if count % print_every == 0:
        valid_h = model.init_hidden(batch_size)
        valid_losses = []
        model.eval()
        with torch.no_grad():
          for x, y in get_batches(valid_data, batch_size, seq_length):
            x = one_hot_encode(x, n_chars)

            with torch.cuda.amp.autocast():
              x, y = torch.from_numpy(x).to(device), torch.from_numpy(y).to(device)

            valid_h = tuple([each.data for each in valid_h])

            inputs, targets = x, y
            
            output, valid_h = model(inputs, valid_h)
            valid_loss = criterion(output, targets.view(batch_size * seq_length).long())

            valid_losses.append(valid_loss.item())
        
        count_since_last_save += print_every
        valid_loss_mean = np.mean(valid_losses)
        model.train()

        print( "-------------------------------------\n"
              f"Epoch: {e + 1} / {epochs}\n"
              f"Step: {count}\n"
              f"Training Loss: {loss.item(): .4f}\n"
              f"Validation Loss: {valid_loss_mean: .4f}\n")
        
        if valid_loss_mean <= valid_loss_min:
          valid_loss_min = valid_loss_mean
          print("Validation Loss Decreased! Saving Model Params.")
          model_name = 'rnn.net'
          checkpoint = {'n_hidden': model.n_hidden,
                        'n_layers': model.n_layers,
                        'state_dict': model.state_dict(),
                        'tokens': model.chars}
          with open(model_name, 'wb') as f:
            torch.save(checkpoint, f)
          count_since_last_save = 0

        print(f"Steps Since Last Save: {count_since_last_save} / {print_every * 7}")

    if count_since_last_save >= print_every * 7:
      print("Stopping training. Validation Loss has stopped decreasing.")
      break  

In [None]:
# define and print the net
n_hidden=512
n_layers=2
chars = tuple(set(text))
int_char = dict(enumerate(chars))
char_int = {ch: ii for ii, ch in int_char.items()}
encoded = np.array([char_int[ch] for ch in text])

net = CharRNN(chars, n_hidden, n_layers)
print(net)

CharRNN(
  (lstm): LSTM(92, 512, num_layers=2, batch_first=True, dropout=0.3)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=512, out_features=92, bias=True)
)


In [36]:
batch_size = 128
seq_length = 100
n_epochs = 200

# train the model
train_loop(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=3E-4, print_every=30)

-------------------------------------
Epoch: 1 / 200
Step: 30
Training Loss:  0.9844
Validation Loss:  1.2763

Validation Loss Decreased! Saving Model Params.
Steps Since Last Save: 0 / 210
-------------------------------------
Epoch: 2 / 200
Step: 60
Training Loss:  0.9854
Validation Loss:  1.2807

Steps Since Last Save: 30 / 210
-------------------------------------
Epoch: 2 / 200
Step: 90
Training Loss:  1.0010
Validation Loss:  1.2742

Validation Loss Decreased! Saving Model Params.
Steps Since Last Save: 0 / 210
-------------------------------------
Epoch: 3 / 200
Step: 120
Training Loss:  0.9805
Validation Loss:  1.2737

Validation Loss Decreased! Saving Model Params.
Steps Since Last Save: 0 / 210
-------------------------------------
Epoch: 4 / 200
Step: 150
Training Loss:  1.0186
Validation Loss:  1.2796

Steps Since Last Save: 30 / 210
-------------------------------------
Epoch: 4 / 200
Step: 180
Training Loss:  1.0010
Validation Loss:  1.2736

Validation Loss Decreased! Sav

## Test, Visualize

In [37]:
# load saved checkpoint
with open('rnn.net', 'rb') as f:
  checkpoint = torch.load(f)

loaded = CharRNN(checkpoint['tokens'], n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers'])
loaded.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [None]:
# copy over the rnn to the machine learning directory
!cp rnn.net gdrive/MyDrive/MachineLearning/

In [None]:
def predict(net, char, h=None, top_k=None):
  train_on_gpu = torch.cuda.is_available()
  # tensor inputs
  x = np.array([[net.char_int[char]]])
  x = one_hot_encode(x, len(net.chars))
  inputs = torch.from_numpy(x).to(device)
        
  # get the hidden state from the history
  h = tuple([each.data for each in h])
  # get the output of the model
  out, h = net(inputs, h)

  # get the character probabilities
  p = F.softmax(out, dim=1).data
  if train_on_gpu:
    p = p.cpu()
        
  # get top possible characters
  if top_k is None:
    top_ch = np.arange(len(net.chars))
  else:
    p, top_ch = p.topk(top_k)
    top_ch = top_ch.numpy().squeeze()
        
  # select the likely next character with some element of randomness
  p = p.numpy().squeeze()
  char = np.random.choice(top_ch, p=p/p.sum())
        
  # return the value of the character, hidden state (encoded)
  return net.int_char[char], h

In [None]:
def sample(net, size, prime="The", top_k=None):
  net.to(device)

  net.eval()

  chars = [ch for ch in prime]
  h = net.init_hidden(1)
  for ch in prime:
    char, h = predict(net, ch, h, top_k=top_k)
  
  chars.append(char)

  for ii in range(size):
    char, h = predict(net, chars[-1], h, top_k=top_k)
    chars.append(char)
  
  return "".join(chars)

In [40]:
print(sample(loaded, 3000, prime='What ', top_k=4))

What and all the
      thoughts that her manners in her pleasure was so far at least
      in that passed on his fave. Mr. Darcy, that he spoke with the
      concert to herself the consequence of her father as he had been
      settled him, and the present party; the day were a sorious
      continued to the long, and she said himself on her family. The happine
      of his father’s signing satidance of Mrr. Bingley’s being, that he
      should have been a some of the sone of the former, and was to
      deceive me, it soon as they had been the complaming hours, and
      to the case of her family, her sister’s compliments of the house
      who could have always been disposed to be, and that she had
      never felt a matter was a minute fortunate and present he was
      attending him with her fave of speaking on the regree to some
      propostion of his cousin’s friends, as they had been a five
      for having house, and what he was such as shouths, and they
      was soon as he