In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import numpy as np
from torch.utils.data import DataLoader
from pydub import AudioSegment
from IPython.display import Audio
from tqdm.notebook import tqdm

seed = 1337
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
ebook_path = 'pg2554.epub'
rate = 22050
batch_size = 100

In [4]:
# !pip install nvidia-pyindex
# !pip install pytorch-quantization

In [5]:
tacotron2 = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_tacotron2', model_math='fp16')
tacotron2 = tacotron2.to(device)
tacotron2 = tacotron2.eval()

utils = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_tts_utils')

waveglow = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_waveglow', model_math='fp16')
waveglow = waveglow.remove_weightnorm(waveglow)
waveglow = waveglow.to(device)
waveglow = waveglow.eval()

Using cache found in /home/paperspace/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub
  "pytorch_quantization module not found, quantization will not be available"
  "pytorch_quantization module not found, quantization will not be available"
Using cache found in /home/paperspace/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub
Using cache found in /home/paperspace/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub


In [6]:
def read_ebook(ebook_path):
    
    import ebooklib
    from ebooklib import epub
    from bs4 import BeautifulSoup
    from tqdm.notebook import tqdm
    from nltk import tokenize, download
    download('punkt')
    
    book = epub.read_epub(ebook_path)

    corpus = []
    for item in tqdm(list(book.get_items())):
        if item.get_type() == ebooklib.ITEM_DOCUMENT:
            input_text = BeautifulSoup(item.get_content(), "html.parser").text
            text_list = []
            for paragraph in input_text.split('\n'):
                sentences = tokenize.sent_tokenize(paragraph)
                text_list.append(sentences)
            text_list = [text for sentences in text_list for text in sentences]
            corpus.append(text_list)

    return corpus

In [7]:
ebook = read_ebook(ebook_path)

[nltk_data] Downloading package punkt to /home/paperspace/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


HBox(children=(FloatProgress(value=0.0, max=55.0), HTML(value='')))




In [8]:
len(ebook)

50

In [9]:
ebook[6]

[' CHAPTER III',
 'He waked up late next day after a broken sleep.',
 'But his sleep had not refreshed him; he waked up bilious, irritable, ill-tempered, and looked with hatred at his room.',
 'It was a tiny cupboard of a room about six paces in length.',
 'It had a poverty-stricken appearance with its dusty yellow paper peeling off the walls, and it was so low-pitched that a man of more than average height was ill at ease in it and felt every moment that he would knock his head against the ceiling.',
 'The furniture was in keeping with the room: there were three old chairs, rather rickety; a painted table in the corner on which lay a few manuscripts and books; the dust that lay thick upon them showed that they had been long untouched.',
 'A big clumsy sofa occupied almost the whole of one wall and half the floor space of the room; it was once covered with chintz, but was now in rags and served Raskolnikov as a bed.',
 'Often he went to sleep on it, as he was, without undressing, witho

In [10]:
text = ebook[6]
text = [sentence[:150] for sentence in text]
tokens, lengths = utils.prepare_input_sequence(text)



In [11]:
lengths

tensor([153, 153, 153, 153, 152, 150, 150, 150, 150, 150, 150, 150, 150, 150,
        150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
        150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
        150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
        150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
        150, 150, 150, 150, 150, 150, 149, 149, 149, 149, 146, 146, 146, 145,
        145, 143, 143, 143, 142, 138, 135, 134, 132, 131, 129, 128, 126, 124,
        124, 123, 122, 118, 118, 117, 117, 116, 115, 115, 115, 114, 113, 113,
        112, 112, 109, 109, 108, 108, 108, 106, 106, 104, 103, 102, 100,  99,
         99,  97,  97,  96,  95,  93,  93,  91,  91,  90,  89,  88,  88,  88,
         86,  86,  85,  85,  81,  81,  78,  78,  77,  76,  74,  73,  71,  70,
         69,  68,  65,  65,  64,  63,  63,  62,  61,  59,  59,  57,  57,  57,
         56,  54,  53,  53,  51,  51,  51,  50,  49,  48,  47,  

In [12]:
len(text)

230

In [13]:
tokens.shape

torch.Size([230, 153])

In [14]:
data = [(tokens[i],lengths[i]) for i in range(len(tokens))]

In [15]:
dataloader = DataLoader(data, batch_size=6)

In [16]:
len(dataloader)

39

In [17]:
audio_list = []
for X, length in tqdm(dataloader):
    with torch.no_grad():
        Y, _, _ = tacotron2.infer(X, length)
        audio = waveglow.infer(Y)
        audio_list.append(audio)
#     audio_numpy = audio[0].data.cpu().numpy()

HBox(children=(FloatProgress(value=0.0, max=39.0), HTML(value='')))

KeyboardInterrupt: 

In [None]:
!nvidia-smi

In [None]:
50 * 2.5