[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/kan-bayashi/ParallelWaveGAN/blob/master/notebooks/convert_melgan_from_pytorch_to_tensorflow.ipynb)

# Convert MelGAN generator from pytorch to tensorflow

This notebook proivdies the procedure of conversion of MelGAN generator from pytorch to tensorflow.  
Tensorflow version can accelerate the inference speed on both CPU and GPU.

In [1]:
# install libraries for google colab
!git clone https://github.com/kan-bayashi/ParallelWaveGAN.git
!cd ParallelWaveGAN; pip install -qq .

Cloning into 'ParallelWaveGAN'...
remote: Enumerating objects: 113, done.[K
remote: Counting objects: 100% (113/113), done.[K
remote: Compressing objects: 100% (48/48), done.[K
remote: Total 3552 (delta 65), reused 102 (delta 63), pack-reused 3439[K
Receiving objects: 100% (3552/3552), 23.88 MiB | 20.19 MiB/s, done.
Resolving deltas: 100% (1949/1949), done.
[K     |████████████████████████████████| 1.6MB 51.6MB/s 
[K     |████████████████████████████████| 204kB 51.0MB/s 
[K     |████████████████████████████████| 2.9MB 47.8MB/s 
[?25h  Building wheel for parallel-wavegan (setup.py) ... [?25l[?25hdone
  Building wheel for librosa (setup.py) ... [?25l[?25hdone
  Building wheel for kaldiio (setup.py) ... [?25l[?25hdone
[31mERROR: tensorflow-federated 0.12.0 has requirement tensorflow~=2.1.0, but you'll have tensorflow 1.15.0 which is incompatible.[0m
[31mERROR: tensorflow-federated 0.12.0 has requirement tensorflow-addons~=0.7.0, but you'll have tensorflow-addons 0.8.3 whi

In [2]:
%tensorflow_version 2.x
import os
import numpy as np
import torch
import tensorflow as tf
import yaml
from parallel_wavegan.models import MelGANGenerator
from parallel_wavegan.models.tf_models import TFMelGANGenerator

TensorFlow 2.x selected.


## Define Tensorflow and Pytorch models

In [0]:
# load vocoder config 
vocoder_conf = 'ParallelWaveGAN/egs/ljspeech/voc1/conf/melgan.v1.long.yaml'
with open(vocoder_conf) as f:
    config = yaml.load(f, Loader=yaml.Loader)

In [4]:
# define Tensorflow MelGAN generator
tf.compat.v1.disable_eager_execution()
inputs = tf.keras.Input(batch_shape=[None, None, 80], dtype=tf.float32)
audio = TFMelGANGenerator(**config["generator_params"])(inputs)
tf_melgan = tf.keras.models.Model(inputs, audio)
tf_melgan.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None, 80)]        0         
_________________________________________________________________
tf_mel_gan_generator (TFMelG (None, None, 1)           4260257   
Total params: 4,260,257
Trainable params: 4,260,257
Non-trainable params: 0
_________________________________________________________________


In [0]:
# define pytorch model
pytorch_melgan = MelGANGenerator(**config["generator_params"])
pytorch_melgan.remove_weight_norm()  # needed since TFMelGANGenerator does not support weight norm
pytorch_melgan = pytorch_melgan.to("cpu")

In [6]:
# check the number of variables are the same
state_dict = pytorch_melgan.state_dict()
tf_vars = tf.compat.v1.global_variables()
print("Number Tensorflow variables: ", len(tf_vars))
print("Number Pytorch variables: ", len(state_dict.keys()))

Number Tensorflow variables:  84
Number Pytorch variables:  84


## Convert parameters from pytorch to tensorflow

In [0]:
def reorder_tf_vars(tf_vars):
    """
    Reorder tensorflow variables to match with pytorch state dict order. 
    Since each tensorflow layer's order is bias -> weight while pytorch's 
    one is weight -> bias, we change the order of variables.
    """
    tf_new_var = []
    for i in range(0, len(tf_vars), 2):
        tf_new_var.append(tf_vars[i + 1])
        tf_new_var.append(tf_vars[i])
    return tf_new_var

In [0]:
# change the order of variables to be the same as pytorch
tf_vars = reorder_tf_vars(tf_vars)

In [0]:
def convert_weights_pytorch_to_tensorflow(weights_pytorch):
    """
    Convert pytorch Conv1d weight variable to tensorflow Conv2D weights.
    Pytorch (f_output, f_input, kernel_size) -> TF (kernel_size, f_input, 1, f_output)
    """
    weights_tensorflow = np.transpose(weights_pytorch, (0,2,1))  # [f_output, kernel_size, f_input]
    weights_tensorflow = np.transpose(weights_tensorflow, (1,0,2))  # [kernel-size, f_output, f_input]
    weights_tensorflow = np.transpose(weights_tensorflow, (0,2,1))  # [kernel-size, f_input, f_output]
    weights_tensorflow = np.expand_dims(weights_tensorflow, 1)  # [kernel-size, f_input, 1, f_output]
    return weights_tensorflow

In [0]:
# convert pytorch's variables to tensorflow's one
for i, var_name in enumerate(state_dict):
    try:
        tf_name = tf_vars[i]
        torch_tensor = state_dict[var_name].numpy()
        if torch_tensor.ndim >= 2:
            tensorflow_tensor = convert_weights_pytorch_to_tensorflow(torch_tensor)
        else:
            tensorflow_tensor = torch_tensor
        tf.keras.backend.set_value(tf_name, tensorflow_tensor)
    except:
        print(tf_name)

## Check both outputs are almost the equal

In [0]:
fake_mels = np.random.sample((1, 80, 250)).astype(np.float32)
with torch.no_grad():
    y_pytorch = pytorch_melgan(torch.Tensor(fake_mels))
y_tensorflow = tf_melgan.predict(np.transpose(fake_mels, (0, 2, 1)))
np.testing.assert_almost_equal(
    y_pytorch[0, 0, :].numpy(),
    y_tensorflow[0, :, 0],
)

## Save Tensorflow and Pytorch models for benchmark

In [12]:
os.makedirs("./checkpoint/tensorflow_generator/", exist_ok=True)
os.makedirs("./checkpoint/pytorch_generator/", exist_ok=True)
tf.saved_model.save(tf_melgan, "./checkpoint/tensorflow_generator/")
torch.save(pytorch_melgan.state_dict(), "./checkpoint/pytorch_generator/checkpoint.pkl")

INFO:tensorflow:Assets written to: ./checkpoint/tensorflow_generator/assets


INFO:tensorflow:Assets written to: ./checkpoint/tensorflow_generator/assets


## Inference speed benchmark on GPU

From here, we will compare the inference speed using pytorch model and converted tensorflow model.

In [0]:
# To enable eager mode, we need to restart the runtime
import os
os._exit(00)

In [1]:
%tensorflow_version 2.x
import numpy as np
import torch
import yaml
import tensorflow as tf
from tensorflow.python.framework import convert_to_constants
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import tag_constants
from parallel_wavegan.models import MelGANGenerator

TensorFlow 2.x selected.


In [0]:
# setup pytorch model
vocoder_conf = 'ParallelWaveGAN/egs/ljspeech/voc1/conf/melgan.v1.long.yaml'
with open(vocoder_conf) as f:
    config = yaml.load(f, Loader=yaml.Loader)
pytorch_melgan = MelGANGenerator(**config["generator_params"])
pytorch_melgan.remove_weight_norm()
pytorch_melgan.load_state_dict(torch.load(
    "./checkpoint/pytorch_generator/checkpoint.pkl", map_location="cpu"))
pytorch_melgan = pytorch_melgan.to("cuda").eval()

In [0]:
# setup tensorflow model
class TFMelGAN(object):
    def __init__(self, saved_path):
        self.saved_path = saved_path
        self.graph = self._load_model()
        self.mels = None
        self.audios = None
    
    def _load_model(self):
        saved_model_loaded = tf.saved_model.load(
            self.saved_path, tags=[tag_constants.SERVING])
        graph_func = saved_model_loaded.signatures[
            signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
        graph_func = convert_to_constants.convert_variables_to_constants_v2(graph_func)
        return graph_func

    def set_mels(self, values):
        self.mels = tf.identity(tf.constant(values))

    def get_mels(self):
        return self.mels

    def get_audio(self):
        return self.audios

    def run_inference(self):
        self.audios = self.graph(self.mels)[0]
        return self.audios   
    
tf_melgan = TFMelGAN(saved_path='./checkpoint/tensorflow_generator/')

In [0]:
# warmup
fake_mels = np.random.sample((4, 1500, 80)).astype(np.float32)
tf_melgan.set_mels(fake_mels)
fake_mels = torch.Tensor(fake_mels).transpose(2, 1).to("cuda")
with torch.no_grad():
    y = pytorch_melgan(fake_mels)
y = tf_melgan.run_inference()

In [7]:
%%time
# check pytorch inference speed
with torch.no_grad():
    y = pytorch_melgan(fake_mels)

CPU times: user 6.99 ms, sys: 998 µs, total: 7.99 ms
Wall time: 12.7 ms


In [8]:
%%time
# check tensorflow inference speed
y = tf_melgan.run_inference()

CPU times: user 5.16 ms, sys: 1.04 ms, total: 6.2 ms
Wall time: 6.05 ms
