Epoch 0/-2 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━ 1063/1501 0:11:18 • 0:04:05 1.79it/s v_num: 0 loss/train_step: 3.557 [2023-09-30 22:57:21,736][matcha.utils.utils][ERROR] - Traceback (most recent call last): File "/home/azureuser/exp/Matcha-TTS/matcha/utils/utils.py", line 76, in wrap metric_dict, object_dict = task_func(cfg=cfg) File "/home/azureuser/exp/Matcha-TTS/matcha/train.py", line 79, in train trainer.fit(model=model, datamodule=datamodule, ckpt_path=cfg.get("ckpt_path")) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 532, in fit call._call_and_handle_interrupt( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 43, in _call_and_handle_interrupt return trainer_fn(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 571, in _fit_impl self._run(model, ckpt_path=ckpt_path) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 980, in _run results = self._run_stage() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1023, in _run_stage self.fit_loop.run() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py", line 202, in run self.advance() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py", line 355, in advance self.epoch_loop.run(self._data_fetcher) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py", line 133, in run self.advance(data_fetcher) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py", line 219, in advance batch_output = self.automatic_optimization.run(trainer.optimizers[0], kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 188, in run self._optimizer_step(kwargs.get("batch_idx", 0), closure) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 266, in _optimizer_step call._call_lightning_module_hook( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 146, in _call_lightning_module_hook output = fn(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/core/module.py", line 1276, in optimizer_step optimizer.step(closure=optimizer_closure) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/core/optimizer.py", line 161, in step step_output = self._strategy.optimizer_step(self._optimizer, closure, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 231, in optimizer_step return self.precision_plugin.optimizer_step(optimizer, model=model, closure=closure, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/plugins/precision/amp.py", line 76, in optimizer_step closure_result = closure() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 142, in __call__ self._result = self.closure(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 128, in closure step_output = self._step_fn() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 315, in _training_step training_step_output = call._call_strategy_hook(trainer, "training_step", *kwargs.values()) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 294, in _call_strategy_hook output = fn(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 380, in training_step return self.model.training_step(*args, **kwargs) File "/home/azureuser/exp/Matcha-TTS/matcha/models/baselightningmodule.py", line 79, in training_step loss_dict = self.get_losses(batch) File "/home/azureuser/exp/Matcha-TTS/matcha/models/baselightningmodule.py", line 61, in get_losses dur_loss, prior_loss, diff_loss = self( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/azureuser/exp/Matcha-TTS/matcha/models/matcha_tts.py", line 233, in forward diff_loss, _ = self.decoder.compute_loss(x1=y, mask=y_mask, mu=mu_y, spks=spks, cond=cond) File "/home/azureuser/exp/Matcha-TTS/matcha/models/components/flow_matching.py", line 117, in compute_loss loss = F.mse_loss(self.estimator(y, mask, mu, t.squeeze(), spks), u, reduction="sum") / ( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/azureuser/exp/Matcha-TTS/matcha/models/components/decoder.py", line 431, in forward x = transformer_block( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/azureuser/exp/Matcha-TTS/matcha/models/components/transformer.py", line 266, in forward attn_output = self.attn1( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 417, in forward return self.processor( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 1036, in __call__ hidden_states = F.scaled_dot_product_attention( torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.88 GiB (GPU 0; 79.10 GiB total capacity; 29.03 GiB already allocated; 2.47 GiB free; 29.53 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF [2023-09-30 22:57:21,739][matcha.utils.utils][INFO] - Output dir: /home/azureuser/exp/Matcha-TTS/logs/train/ljspeech/runs/2023-09-30_22-45-53 Error executing job with overrides: ['experiment=ljspeech'] Traceback (most recent call last): File "/home/azureuser/exp/Matcha-TTS/matcha/train.py", line 122, in main() # pylint: disable=no-value-for-parameter File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/hydra/main.py", line 94, in decorated_main _run_hydra( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra _run_app( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/hydra/_internal/utils.py", line 457, in _run_app run_and_report( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/hydra/_internal/utils.py", line 223, in run_and_report raise ex File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/hydra/_internal/utils.py", line 220, in run_and_report return func() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/hydra/_internal/utils.py", line 458, in lambda: hydra.run( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/hydra/_internal/hydra.py", line 132, in run _ = ret.return_value File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/hydra/core/utils.py", line 260, in return_value raise self._return_value File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/hydra/core/utils.py", line 186, in run_job ret.return_value = task_function(task_cfg) File "/home/azureuser/exp/Matcha-TTS/matcha/train.py", line 112, in main metric_dict, _ = train(cfg) File "/home/azureuser/exp/Matcha-TTS/matcha/utils/utils.py", line 86, in wrap raise ex File "/home/azureuser/exp/Matcha-TTS/matcha/utils/utils.py", line 76, in wrap metric_dict, object_dict = task_func(cfg=cfg) File "/home/azureuser/exp/Matcha-TTS/matcha/train.py", line 79, in train trainer.fit(model=model, datamodule=datamodule, ckpt_path=cfg.get("ckpt_path")) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 532, in fit call._call_and_handle_interrupt( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 43, in _call_and_handle_interrupt return trainer_fn(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 571, in _fit_impl self._run(model, ckpt_path=ckpt_path) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 980, in _run results = self._run_stage() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/trainer.py", line 1023, in _run_stage self.fit_loop.run() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py", line 202, in run self.advance() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py", line 355, in advance self.epoch_loop.run(self._data_fetcher) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py", line 133, in run self.advance(data_fetcher) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/training_epoch_loop.py", line 219, in advance batch_output = self.automatic_optimization.run(trainer.optimizers[0], kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 188, in run self._optimizer_step(kwargs.get("batch_idx", 0), closure) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 266, in _optimizer_step call._call_lightning_module_hook( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 146, in _call_lightning_module_hook output = fn(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/core/module.py", line 1276, in optimizer_step optimizer.step(closure=optimizer_closure) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/core/optimizer.py", line 161, in step step_output = self._strategy.optimizer_step(self._optimizer, closure, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 231, in optimizer_step return self.precision_plugin.optimizer_step(optimizer, model=model, closure=closure, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/plugins/precision/amp.py", line 76, in optimizer_step closure_result = closure() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 142, in __call__ self._result = self.closure(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 128, in closure step_output = self._step_fn() File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/loops/optimization/automatic.py", line 315, in _training_step training_step_output = call._call_strategy_hook(trainer, "training_step", *kwargs.values()) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py", line 294, in _call_strategy_hook output = fn(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/lightning/pytorch/strategies/strategy.py", line 380, in training_step return self.model.training_step(*args, **kwargs) File "/home/azureuser/exp/Matcha-TTS/matcha/models/baselightningmodule.py", line 79, in training_step loss_dict = self.get_losses(batch) File "/home/azureuser/exp/Matcha-TTS/matcha/models/baselightningmodule.py", line 61, in get_losses dur_loss, prior_loss, diff_loss = self( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/azureuser/exp/Matcha-TTS/matcha/models/matcha_tts.py", line 233, in forward diff_loss, _ = self.decoder.compute_loss(x1=y, mask=y_mask, mu=mu_y, spks=spks, cond=cond) File "/home/azureuser/exp/Matcha-TTS/matcha/models/components/flow_matching.py", line 117, in compute_loss loss = F.mse_loss(self.estimator(y, mask, mu, t.squeeze(), spks), u, reduction="sum") / ( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/azureuser/exp/Matcha-TTS/matcha/models/components/decoder.py", line 431, in forward x = transformer_block( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/home/azureuser/exp/Matcha-TTS/matcha/models/components/transformer.py", line 266, in forward attn_output = self.attn1( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 417, in forward return self.processor( File "/anaconda/envs/matcha-tts/lib/python3.10/site-packages/diffusers/models/attention_processor.py", line 1036, in __call__ hidden_states = F.scaled_dot_product_attention( torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.88 GiB (GPU 0; 79.10 GiB total capacity; 29.03 GiB already allocated; 2.47 Gi B free; 29.53 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. S ee documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF