#### Tutorial adapted from https://pytorch.org/tutorials/intermediate/char_rnn_generation_tutorial.html

In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import os
import unicodedata
import string

all_letters = string.ascii_letters + " .,;'-"
n_letters = len(all_letters) + 1 # Plus EOS marker

def findFiles(path): return glob.glob(path)

# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

# Read a file and split into lines
def readLines(filename):
    with open(filename, encoding='utf-8') as some_file:
        return [unicodeToAscii(line.strip()) for line in some_file]

# Build the category_lines dictionary, a list of lines per category
category_lines = {}
all_categories = []
for filename in findFiles('../names/names/*.txt'):
    category = os.path.splitext(os.path.basename(filename))[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

if n_categories == 0:
    raise RuntimeError('Data not found. Make sure that you downloaded data '
        'from https://download.pytorch.org/tutorial/data.zip and extract it to '
        'the current directory.')

print('# categories:', n_categories, all_categories)
print(unicodeToAscii("O'Néàl"))

# categories: 18 ['Portuguese', 'Czech', 'Korean', 'Arabic', 'English', 'Russian', 'German', 'Spanish', 'Vietnamese', 'Polish', 'Irish', 'Japanese', 'French', 'Scottish', 'Greek', 'Chinese', 'Italian', 'Dutch']
O'Neal


In [2]:
import torch
import torch.nn as nn

class TargetRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(TargetRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.rnn = nn.GRUCell(n_categories + input_size, self.hidden_size)
        self.linear = nn.Linear(self.hidden_size, 256)
        self.linear2 = nn.Linear(256, 256)
        self.linear3 = nn.Linear(256, self.output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, category, inp, hidden):
        concatenated = torch.cat((category, inp), dim=-1)
        hidden =  self.rnn(concatenated, hidden)

        output = self.linear(hidden)
        output = torch.relu(output)
        output = self.linear2(output)
        output = torch.relu(output)
        output = self.linear3(output)

        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

Training

In [3]:
import random

# Random item from a list
def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

# Get a random category and random line from that category
def randomTrainingPair():
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    return category, line

# One-hot vector for category
def categoryTensor(category):
    li = all_categories.index(category)
    tensor = torch.zeros(1, n_categories)
    tensor[0][li] = 1
    return tensor

# One-hot matrix of first to last letters (not including EOS) for input
def inputTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li in range(len(line)):
        letter = line[li]
        tensor[li][0][all_letters.find(letter)] = 1
    return tensor

# LongTensor of second letter to end (EOS) for target
def targetTensor(line):
    letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
    letter_indexes.append(n_letters - 1) # EOS
    return torch.LongTensor(letter_indexes)

# Make category, input, and target tensors from a random category, line pair
def randomTrainingExample():
    category, line = randomTrainingPair()
    category_tensor = categoryTensor(category)
    input_line_tensor = inputTensor(line)
    target_line_tensor = targetTensor(line)
    return category_tensor, input_line_tensor, target_line_tensor

In [5]:

import torch.nn as nn

criterion = nn.NLLLoss()

def train_static_hyper_rnn_step(static_hyper_rnn, optimizer, category_tensor, input_line_tensor, target_line_tensor):
    target_line_tensor = target_line_tensor.unsqueeze(-1).to(static_hyper_rnn.device)
    hidden = target_network.initHidden().to(static_hyper_rnn.device)

    optimizer.zero_grad()

    loss = 0

    generated_params, embedding, embedding_module_output, weight_generator_output = static_hyper_rnn.generate_params()

    for i in range(input_line_tensor.size(0)):
        out = static_hyper_rnn(inp=(category_tensor.to(static_hyper_rnn.device), input_line_tensor[i].to(static_hyper_rnn.device), hidden), generated_params=generated_params, has_aux=False)
        output, hidden = out
        l = criterion(output, target_line_tensor[i])
        loss += l

    loss.backward()
    torch.nn.utils.clip_grad_norm_(static_hyper_rnn.parameters(), 10.0)
    optimizer.step()

    return output, {"loss":loss.item() / input_line_tensor.size(0)}

In [6]:
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
import os
import tqdm

def get_tensorboard_logger(
    experiment_name: str, base_log_path: str = "tensorboard_logs"
):
    log_path = "{}/{}_{}".format(base_log_path, experiment_name, datetime.now())
    train_writer = SummaryWriter(log_path, flush_secs=10)
    full_log_path = os.path.join(os.getcwd(), log_path)
    print(
        "Follow tensorboard logs with: python -m tensorboard.main --logdir '{}'".format(full_log_path)
    )
    return train_writer


In [7]:
from tqdm import tqdm
import numpy as np
import time
import datetime as dt


def train(hypernet, train_iter_fn, lr, n_iters):
    writer = get_tensorboard_logger("HyperRNN")
    optimizer = torch.optim.Adam(hypernet.parameters(), lr=lr)
    bar = tqdm(np.arange(n_iters))

    for i in bar:
        category_tensor, input_line_tensor, target_line_tensor = randomTrainingExample()

        start_time = dt.datetime.today().timestamp()

        _, metrics = train_iter_fn(hypernet, optimizer, category_tensor, input_line_tensor, target_line_tensor)

        time_diff = (dt.datetime.today().timestamp() - start_time) + 1e-5

        metrics["diff"] = time_diff
        for key in metrics:
            writer.add_scalar(key, metrics[key], i)


        loss = metrics['loss']
        bar.set_description('Loss: {} Iters p sec: {}'.format(loss, str(time_diff)))


### Hypernetwork

In [8]:
from hypernn.torch.hypernet import TorchHyperNetwork
from hypernn.torch.weight_generator import TorchWeightGenerator, DefaultTorchWeightGenerator
from hypernn.torch.embedding_module import TorchEmbeddingModule, DefaultTorchEmbeddingModule

In [11]:
target_network = TargetRNN(n_letters, 128, n_letters)
pytorch_total_params = sum(p.numel() for p in target_network.parameters() if p.requires_grad)
pytorch_total_params

193467

In [18]:
from typing import Dict, Iterable, Optional, Any

class CustomWeightGenerator(TorchWeightGenerator):
    def __init__(
        self,
        embedding_dim: int,
        num_embeddings: int,
        hidden_dim: int,
        target_input_shape: Optional[Any] = None,
    ):
        super().__init__(embedding_dim, num_embeddings, hidden_dim, target_input_shape)
        self.linear1 = nn.Linear(embedding_dim, 32)
        self.linear2 = nn.Linear(32, hidden_dim)

    def forward(
        self,
        embedding,
        inp: Iterable[Any] = [],
        *args,
        **kwargs
    ) -> Dict[str, torch.Tensor]:
        x = self.linear1(embedding)
        x = torch.relu(x)
        x = self.linear2(x)
        return x.view(-1), {}


In [43]:
EMBEDDING_DIM = 32
NUM_EMBEDDINGS = 256

embedding_module = DefaultTorchEmbeddingModule.from_target(target_network, EMBEDDING_DIM, NUM_EMBEDDINGS)
weight_generator = CustomWeightGenerator.from_target(target_network, EMBEDDING_DIM, NUM_EMBEDDINGS)

In [44]:
hypernetwork = TorchHyperNetwork.from_target(
                                target_input_shape=((1, n_categories), (1, n_letters), (1, 128)),
                                target_network=target_network,
                                embedding_module=embedding_module,
                                weight_generator=weight_generator,
                            )
pytorch_total_params = sum(p.numel() for p in hypernetwork.parameters() if p.requires_grad)
pytorch_total_params

34228

In [45]:
device = torch.device('cuda')
hypernetwork = hypernetwork.to(device)

In [46]:
learning_rate = 0.0001

train(hypernet=hypernetwork, train_iter_fn=train_static_hyper_rnn_step, lr=learning_rate, n_iters=100000)


Follow tensorboard logs with: python -m tensorboard.main --logdir '/home/shyam/Code/hyper-nn/notebooks/torch/tensorboard_logs/HyperRNN_2022-04-14 13:37:25.814851'


Loss: 2.2627088758680554 Iters p sec: 0.013108001480102539: 100%|█| 100000/100000 [19:30<00:00, 85.46it/


In [47]:
hypernetwork = hypernetwork.to(torch.device('cpu'))

In [48]:
max_length = 20

# Sample from a category and starting letter
def sample(category, start_letter='A'):
    with torch.no_grad():  # no need to track history in sampling
        category_tensor = categoryTensor(category)
        input = inputTensor(start_letter)
        hidden = target_network.initHidden()

        output_name = start_letter

        for i in range(max_length):
            out = hypernetwork(inp=(category_tensor, input[0], hidden), has_aux=False)
            output, hidden = out
            topv, topi = output.topk(1)
            topi = topi[0][0]
            if topi == n_letters - 1:
                break
            else:
                letter = all_letters[topi]
                output_name += letter
            input = inputTensor(letter)

        return output_name

# Get multiple samples from one category and multiple starting letters
def samples(category, start_letters='ABC'):
    for start_letter in start_letters:
        print(sample(category, start_letter))

samples('Russian', 'RUS')

samples('German', 'GER')

samples('Spanish', 'SPA')

samples('Chinese', 'CHI')

Ratshin
Ualitrev
Shiknovskin
Garner
Elits
Rotz
Sando
Pelana
Alana
Chan
Han
In


### Dynamic HyperNetwork

In [93]:
from typing import Optional, Any, Tuple
import functools
import torch.nn.functional as F

class DynamicTorchEmbeddingModule(TorchEmbeddingModule):
    def __init__(self, embedding_dim: int, num_embeddings: int, input_shape):
        super().__init__(embedding_dim, num_embeddings)
        self.rnn_hidden_dim = num_embeddings
        self.gru = nn.RNNCell(np.prod(input_shape), num_embeddings)
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)

    def forward(self, inp, hidden_state: Optional[torch.Tensor] = None):
        x = torch.cat(inp[:-1], -1)
        if hidden_state is None:
            hidden_state = torch.zeros(x.size(0), self.rnn_hidden_dim).to(self.device)
        hidden_state = torch.sigmoid(self.gru(x, hidden_state))
        indices = torch.arange(self.num_embeddings).to(self.device)
        embedding = self.embedding(indices)*hidden_state.view(self.num_embeddings, 1)
        return embedding, {"hidden":hidden_state}

    def initHidden(self):
        return torch.zeros(1, self.num_embeddings).to(self.device)

class DynamicTorchWeightGenerator(TorchWeightGenerator):
    def __init__(self, embedding_dim: int, num_embeddings: int, hidden_dim: int, input_shape: Optional[Any] = None):
        super().__init__(embedding_dim, num_embeddings, hidden_dim, input_shape)
        self.linear1 = nn.Linear(embedding_dim, 16)
        self.linear2 = nn.Linear(16, hidden_dim)

    def forward(
        self, embedding, inp: Optional[Any] = None
    ) -> torch.Tensor:
        x = self.linear1(embedding)
        x = F.relu(x)
        return self.linear2(x).view(-1), {}

In [94]:
target_network = TargetRNN(n_letters, 128, n_letters)
pytorch_total_params = sum(p.numel() for p in target_network.parameters() if p.requires_grad)
pytorch_total_params

193467

In [95]:
EMBEDDING_DIM = 32
NUM_EMBEDDINGS = 96

dynamic_embedding_module = DynamicTorchEmbeddingModule.from_target(target_network, EMBEDDING_DIM, NUM_EMBEDDINGS, target_input_shape=(n_categories+n_letters,))
dynamic_weight_generator = DynamicTorchWeightGenerator.from_target(target_network, EMBEDDING_DIM, NUM_EMBEDDINGS, target_input_shape=(n_categories+n_letters,))

In [96]:
dynamic_hypernetwork = TorchHyperNetwork(
                                target_input_shape=((1, n_categories), (1, n_letters), (1, 128)),
                                target_network=target_network,
                                embedding_module=dynamic_embedding_module,
                                weight_generator=dynamic_weight_generator
                            )
pytorch_total_params = sum(p.numel() for p in dynamic_hypernetwork.parameters() if p.requires_grad)
pytorch_total_params

54976

In [97]:
device = torch.device('cuda')
dynamic_hypernetwork = dynamic_hypernetwork.to(device)

In [98]:
import torch.nn as nn

criterion = nn.NLLLoss()

def train_dynamic_hyper_rnn_step(dynamic_hyper_rnn, optimizer, category_tensor, input_line_tensor, target_line_tensor):
    target_line_tensor = target_line_tensor.unsqueeze(-1).to(dynamic_hyper_rnn.device)
    hidden = target_network.initHidden().to(dynamic_hyper_rnn.device)
    hyper_hidden = dynamic_hyper_rnn.embedding_module.initHidden()

    optimizer.zero_grad()

    loss = 0

    for i in range(input_line_tensor.size(0)):
        out, generated_params, embedding, embedding_output, weight_generator_output = dynamic_hyper_rnn(inp=(category_tensor.to(dynamic_hyper_rnn.device), input_line_tensor[i].to(dynamic_hyper_rnn.device), hidden), embedding_module_kwargs={"hidden_state":hyper_hidden})
        hyper_hidden = embedding_output["hidden"]
        output, hidden = out
        l = criterion(output, target_line_tensor[i])
        loss += l

    loss.backward()
    torch.nn.utils.clip_grad_norm_(dynamic_hyper_rnn.parameters(), 10.0)
    optimizer.step()

    grad_dict = {}
    for n, W in dynamic_hyper_rnn.named_parameters():
        if W.grad is not None:
            grad_dict["{}_grad".format(n)] = float(torch.sum(W.grad).item())

    # for p in rnn.parameters():
    #     p.data.add_(p.grad.data, alpha=-learning_rate)

    return output, {"loss":loss.item() / input_line_tensor.size(0), **grad_dict}

In [None]:
learning_rate = 0.0001

train(hypernet=dynamic_hypernetwork, train_iter_fn=train_dynamic_hyper_rnn_step, lr=learning_rate, n_iters=100000)

Follow tensorboard logs with: python -m tensorboard.main --logdir '/home/shyam/Code/hyper-nn/notebooks/torch/tensorboard_logs/HyperRNN_2022-04-14 14:02:42.579049'


Loss: 1.2927270465426974 Iters p sec: 0.022990928421020507: 100%|█| 100000/100000 [28:19<00:00, 58.83it/


In [None]:
dynamic_hypernetwork = dynamic_hypernetwork.to(torch.device('cpu'))

In [103]:
max_length = 20

# Sample from a category and starting letter
def sample(category, start_letter='A'):
    with torch.no_grad():  # no need to track history in sampling
        category_tensor = categoryTensor(category)
        input = inputTensor(start_letter)
        hidden = target_network.initHidden()
        hyper_hidden = dynamic_hypernetwork.embedding_module.initHidden()

        output_name = start_letter

        hidden_states = []
        for i in range(max_length):
            out, generated_params, embedding, embedding_output, weight_generator_output = dynamic_hypernetwork(inp=(category_tensor, input[0], hidden), embedding_module_kwargs={"hidden_state":hyper_hidden})
            hyper_hidden = embedding_output["hidden"]
            output, hidden = out
            topv, topi = output.topk(1)
            topi = topi[0][0]
            if topi == n_letters - 1:
                break
            else:
                letter = all_letters[topi]
                output_name += letter
            input = inputTensor(letter)

        return output_name

# Get multiple samples from one category and multiple starting letters
def samples(category, start_letters='ABC'):
    for start_letter in start_letters:
        print(sample(category, start_letter))

samples('Russian', 'RUS')

samples('German', 'GER')

samples('Spanish', 'SPA')

samples('Chinese', 'CHI')

Ridanov
Ushilin
Stalanov
Gerst
Eckherg
Rocher
Salares
Piallos
Arana
Chan
Huan
Ing
