In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [2]:
from data_utils import TextMelLoader, TextMelCollate, symbols
from torch.utils.data import DataLoader
import models
import utils
import torch
import commons

In [3]:
hps = utils.get_hparams_from_file('config/base.json')
hps.data

{'training_files': '/home/husein/speech-bahasa/male-train-set.txt', 'validation_files': '/home/husein/speech-bahasa/male-test-set.txt', 'max_wav_value': 32768.0, 'sampling_rate': 22050, 'filter_length': 1024, 'hop_length': 256, 'win_length': 1024, 'n_mel_channels': 80, 'mel_fmin': 0.0, 'mel_fmax': 8000.0, 'add_noise': True}

In [4]:
generator = models.FlowGenerator(
  n_vocab=len(symbols) + getattr(hps.data, "add_blank", False), 
  out_channels=hps.data.n_mel_channels, 
  **hps.model).cuda()
optimizer_g = commons.Adam(generator.parameters(), 
                           scheduler=hps.train.scheduler, dim_model=hps.model.hidden_channels, warmup_steps=hps.train.warmup_steps, 
                           lr=hps.train.learning_rate, betas=hps.train.betas, eps=hps.train.eps)

The boolean parameter 'some' has been replaced with a string parameter 'mode'.
Q, R = torch.qr(A, some)
should be replaced with
Q, R = torch.linalg.qr(A, 'reduced' if some else 'complete') (Triggered internally at  ../aten/src/ATen/native/BatchLinearAlgebra.cpp:1940.)
  w_init = torch.qr(torch.FloatTensor(self.n_split, self.n_split).normal_())[0]


In [5]:
train_dataset = TextMelLoader(hps.data.training_files, hps.data)
collate_fn = TextMelCollate(1)
train_loader = DataLoader(train_dataset, num_workers=8, shuffle=False,
  batch_size=hps.train.batch_size, pin_memory=True,
  drop_last=True, collate_fn=collate_fn)

In [6]:
val_dataset = TextMelLoader(hps.data.validation_files, hps.data)
val_loader = DataLoader(val_dataset, num_workers=8, shuffle=False,
                            batch_size=hps.train.batch_size, pin_memory=True,
                            drop_last=True, collate_fn=collate_fn)

In [7]:
e = enumerate(train_loader)

In [8]:
e_val = enumerate(val_loader)

In [9]:
batch_idx, (x, x_lengths, y, y_lengths) = next(e)

In [10]:
y.shape

torch.Size([32, 80, 689])

In [11]:
y_lengths

tensor([689, 544, 620, 623, 585, 570, 601, 491, 444, 476, 478, 355, 440, 369,
        342, 298, 264, 288, 295, 288, 248, 300, 269, 186, 155, 129,  93, 101,
         90,  72,  88,  49])

In [12]:
x_lengths

tensor([111, 101,  99,  98,  97,  90,  88,  80,  77,  75,  74,  59,  59,  54,
         53,  51,  50,  45,  45,  38,  37,  36,  34,  28,  23,  21,  17,  15,
         14,  11,  10,   7])

In [13]:
x, x_lengths = x.cuda(non_blocking=True), x_lengths.cuda(non_blocking=True)
y, y_lengths = y.cuda(non_blocking=True), y_lengths.cuda(non_blocking=True)

In [14]:
generator.train()

FlowGenerator(
  (encoder): TextEncoder(
    (emb): Embedding(34, 192)
    (pre): ConvReluNorm(
      (conv_layers): ModuleList(
        (0): Conv1d(192, 192, kernel_size=(5,), stride=(1,), padding=(2,))
        (1): Conv1d(192, 192, kernel_size=(5,), stride=(1,), padding=(2,))
        (2): Conv1d(192, 192, kernel_size=(5,), stride=(1,), padding=(2,))
      )
      (norm_layers): ModuleList(
        (0): LayerNorm()
        (1): LayerNorm()
        (2): LayerNorm()
      )
      (relu_drop): Sequential(
        (0): ReLU()
        (1): Dropout(p=0.5, inplace=False)
      )
      (proj): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
    )
    (encoder): Encoder(
      (drop): Dropout(p=0.1, inplace=False)
      (attn_layers): ModuleList(
        (0): MultiHeadAttention(
          (conv_q): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
          (conv_k): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
          (conv_v): Conv1d(192, 192, kernel_size=(1,), stride=(1,))
          (con

In [15]:
optimizer_g.zero_grad()

In [16]:
(z, z_m, z_logs, logdet, z_mask), (x_m, x_logs, x_mask), (attn, logw, logw_) = generator(x, x_lengths, y, y_lengths, gen=False)
l_mle = commons.mle_loss(z, z_m, z_logs, logdet, z_mask)
l_length = commons.duration_loss(logw, logw_, x_lengths)

loss_gs = [l_mle, l_length]
loss_g = sum(loss_gs)

loss_g.backward()
grad_norm = commons.clip_grad_value_(generator.parameters(), 5)
optimizer_g.step()

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  ../aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


In [17]:
loss_g

tensor(2.1593, device='cuda:0', grad_fn=<AddBackward0>)