In [1]:
!apt-get -qq update && apt-get -qq install -y libsndfile1 ffmpeg

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


In [2]:
!uv pip install --system --quiet \
    urllib3 \
    wget \
    unidecode \
    matplotlib \
    soundfile \
    librosa \
    Cython \
    packaging \
    nemo_toolkit['all']

In [3]:
!mkdir conf

In [4]:
%%writefile conf/quartznet-transducer
name: &name "QuartzNet15x5"

model:
  sample_rate: &sample_rate 16000
  repeat: &repeat 5
  dropout: &dropout 0.0
  separable: &separable true
  labels: &labels [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
                   "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]

  model_defaults:
    enc_hidden: 1024
    pred_hidden: 512
    joint_hidden: 512

  train_ds:
    manifest_filepath: "/kaggle/input/json-quartznet-librispeech/train_manifest.json"
    sample_rate: 16000
    labels: *labels
    batch_size: 2
    trim_silence: True
    max_duration: 16.7
    shuffle: True
    num_workers: 4
    pin_memory: true
    # tarred datasets
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    # bucketing params
    bucketing_strategy: "synced_randomized"
    bucketing_batch_size: null

  validation_ds:
    manifest_filepath: "/kaggle/input/json-quartznet-librispeech/dev_manifest.json"
    sample_rate: 16000
    labels: *labels
    batch_size: 2
    shuffle: False
    num_workers: 4
    pin_memory: true

  test_ds:
    manifest_filepath: "/kaggle/input/json-quartznet-librispeech/test_manifest.json"
    sample_rate: 16000
    labels: *labels
    batch_size: 2
    shuffle: False
    num_workers: 4
    pin_memory: true

  preprocessor:
    _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
    normalize: "per_feature"
    window_size: 0.02
    sample_rate: *sample_rate
    window_stride: 0.01
    window: "hann"
    features: &n_mels 64
    n_fft: 512
    frame_splicing: 1
    dither: 0.00001

  # spec_augment:
  #   _target_: nemo.collections.asr.modules.SpectrogramAugmentation
  #   freq_masks: 1  
  #   freq_width: 27  
  #   time_masks: 1    
  #   time_width: 50

  encoder:
    _target_: nemo.collections.asr.modules.ConvASREncoder
    feat_in: *n_mels
    activation: relu
    conv_mask: true

    jasper:
    - dilation: [1]
      dropout: *dropout
      filters: 256
      kernel: [33]
      repeat: 1
      residual: false
      separable: *separable
      stride: [2]

    - dilation: [1]
      dropout: *dropout
      filters: 256
      kernel: [33]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 256
      kernel: [33]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 256
      kernel: [33]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 256
      kernel: [39]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 256
      kernel: [39]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 256
      kernel: [39]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 512
      kernel: [51]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 512
      kernel: [51]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 512
      kernel: [51]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 512
      kernel: [63]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 512
      kernel: [63]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 512
      kernel: [63]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 512
      kernel: [75]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 512
      kernel: [75]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: 512
      kernel: [75]
      repeat: *repeat
      residual: true
      separable: *separable
      stride: [1]

    - dilation: [2]
      dropout: *dropout
      filters: 512
      kernel: [87]
      repeat: 1
      residual: false
      separable: *separable
      stride: [1]

    - dilation: [1]
      dropout: *dropout
      filters: &enc_filters 1024
      kernel: [1]
      repeat: 1
      residual: false
      stride: [1]

  decoder:
    _target_: nemo.collections.asr.modules.RNNTDecoder
    normalization_mode: null # Currently only null is supported for export.
    random_state_sampling: false # Random state sampling: https://arxiv.org/pdf/1910.11455.pdf
    blank_as_pad: true # This flag must be set in order to support exporting of RNNT models + efficient inference.

    prednet:
      pred_hidden: 512
      pred_rnn_layers: 2
      t_max: null
      dropout: 0.2

  joint:
    _target_: nemo.collections.asr.modules.RNNTJoint
    log_softmax: null  # 'null' would set it automatically according to CPU/GPU device
    preserve_memory: false  # dramatically slows down training, but might preserve some memory

    # Fuses the computation of prediction net + joint net + loss + WER calculation
    # to be run on sub-batches of size `fused_batch_size`.
    # When this flag is set to true, consider the `batch_size` of *_ds to be just `encoder` batch size.
    # `fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss.
    # Using small values here will preserve a lot of memory during training, but will make training slower as well.
    # An optimal ratio of fused_batch_size : *_ds.batch_size is 1:1.
    # However, to preserve memory, this ratio can be 1:8 or even 1:16.
    # Extreme case of 1:B (i.e. fused_batch_size=1) should be avoided as training speed would be very slow.
    fuse_loss_wer: true
    fused_batch_size: 2

    jointnet:
      joint_hidden: 512
      activation: "relu"
      dropout: 0.2

  decoding:
    strategy: "greedy_batch" # can be greedy, greedy_batch, beam, tsd, alsd.

    # greedy strategy config
    greedy:
      max_symbols: 10

    # beam strategy config
    beam:
      beam_size: 2
      return_best_hypothesis: False
      score_norm: true
      tsd_max_sym_exp: 50  # for Time Synchronous Decoding
      alsd_max_target_len: 2.0  # for Alignment-Length Synchronous Decoding

  optim:
    name: novograd
    # _target_: nemo.core.optim.optimizers.Novograd
    lr: .01
    # optimizer arguments
    betas: [0.8, 0.5]
    weight_decay: 0.001

    # scheduler setup
    sched:
      name: CosineAnnealing

      # pytorch lightning args
      # monitor: val_loss
      # reduce_on_plateau: false

      # Scheduler params
      warmup_steps: null
      warmup_ratio: null
      min_lr: 0.0
      last_epoch: -1

# trainer:
#   devices: 2 # number of gpus
#   max_epochs: 5
#   max_steps: -1 # computed at runtime if not set
#   num_nodes: 1
#   accelerator: auto
#   strategy: auto
#   accumulate_grad_batches: 1
#   enable_checkpointing: False  # Provided by exp_manager
#   logger: False  # Provided by exp_manager
#   log_every_n_steps: 1  # Interval of logging.
#   val_check_interval: 1.0  # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
#   benchmark: false # needs to be false for models with variable-length speech input as it slows down training

exp_manager:
  exp_dir: null
  name: *name
  create_tensorboard_logger: True
  create_checkpoint_callback: True
  checkpoint_callback_params:
    monitor: "val_wer"
    mode: "min"
  create_wandb_logger: False
  wandb_logger_kwargs:
    name: null
    project: null

Writing conf/quartznet-transducer


In [5]:
from omegaconf import OmegaConf
from nemo.collections.asr.models import EncDecCTCModel, EncDecRNNTModel
from lightning.pytorch import Trainer

cfg = OmegaConf.load("conf/quartznet-transducer")
ctc_model = EncDecCTCModel.restore_from("/kaggle/input/not-aug-en-18-33/nemo_experiments/QuartzNet15x5/2025-05-30_12-13-41/checkpoints/QuartzNet15x5.nemo")
model = EncDecRNNTModel(cfg=cfg.model)
model.encoder.load_state_dict(ctc_model.encoder.state_dict(), strict=True)

for p in model.encoder.parameters():
    p.requires_grad = False

# %%
# Định nghĩa callback để in WER sau mỗi validation epoch
import lightning.pytorch as pl

class PrintWERCallback(pl.Callback):
    def on_validation_epoch_end(self, trainer, pl_module):
        # Lấy val_wer từ logged metrics
        val_wer = trainer.callback_metrics.get('val_wer', None)
        current_epoch = trainer.current_epoch
        
        if val_wer is not None:
            print(f"\n{'='*50}")
            print(f"EPOCH {current_epoch} - VALIDATION WER: {val_wer:.4f}")
            print(f"{'='*50}\n")
        else:
            print(f"\n{'='*50}")
            print(f"EPOCH {current_epoch} - WER: Not available")
            print(f"{'='*50}\n")

# %%
from lightning.pytorch import Trainer

trainer = Trainer(
    max_epochs=10,  # Sử dụng max_epochs từ config
    accelerator="gpu",
    devices=1,
    callbacks=[PrintWERCallback()],
    logger=True,  # để ghi lại val_wer nếu cần dùng TensorBoard hoặc CSV
)

# %%
trainer.fit(model)

[NeMo W 2025-06-12 16:59:47 nemo_logging:405] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: /kaggle/input/json-quartznet-librispeech/train_manifest.json
    sample_rate: 16000
    labels:
    - ' '
    - a
    - b
    - c
    - d
    - e
    - f
    - g
    - h
    - i
    - j
    - k
    - l
    - m
    - 'n'
    - o
    - p
    - q
    - r
    - s
    - t
    - u
    - v
    - w
    - x
    - 'y'
    - z
    - ''''
    batch_size: 16
    trim_silence: true
    max_duration: 16.7
    shuffle: true
    num_workers: 8
    pin_memory: false
    is_tarred: false
    tarred_audio_filepaths: null
    shuffle_n: 2048
    bucketing_strategy: synced_randomized
    bucketing_batch_size: null
    
[NeMo W 2025-06-12 16:59:47 nemo_logging:405] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.

[NeMo I 2025-06-12 16:59:47 nemo_logging:393] PADDING: 16
[NeMo I 2025-06-12 16:59:48 nemo_logging:393] Model EncDecCTCModel was successfully restored from /kaggle/input/not-aug-en-18-33/nemo_experiments/QuartzNet15x5/2025-05-30_12-13-41/checkpoints/QuartzNet15x5.nemo.
[NeMo I 2025-06-12 16:59:50 nemo_logging:393] Dataset loaded with 28253 files totalling 99.25 hours
[NeMo I 2025-06-12 16:59:50 nemo_logging:393] 286 files were filtered totalling 1.34 hours
[NeMo I 2025-06-12 16:59:50 nemo_logging:393] Dataset loaded with 2703 files totalling 5.39 hours
[NeMo I 2025-06-12 16:59:50 nemo_logging:393] 0 files were filtered totalling 0.00 hours
[NeMo I 2025-06-12 16:59:50 nemo_logging:393] Dataset loaded with 2620 files totalling 5.40 hours
[NeMo I 2025-06-12 16:59:50 nemo_logging:393] 0 files were filtered totalling 0.00 hours
[NeMo I 2025-06-12 16:59:50 nemo_logging:393] PADDING: 16
[NeMo I 2025-06-12 16:59:51 nemo_logging:393] Using RNNT Loss : warprnnt_numba
    Loss warprnnt_numba_kwar

2025-06-12 16:59:53.206194: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749747593.439975      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749747593.510044      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


[NeMo I 2025-06-12 17:00:04 nemo_logging:393] Optimizer config = Novograd (
    Parameter Group 0
        amsgrad: False
        betas: [0.8, 0.5]
        eps: 1e-08
        grad_averaging: False
        lr: 0.01
        weight_decay: 0.001
    )
[NeMo I 2025-06-12 17:00:04 nemo_logging:393] Scheduler "<nemo.core.optim.lr_scheduler.CosineAnnealing object at 0x7f9a9ce18050>" 
    will be used during training (effective maximum steps = 141270) - 
    Parameters : 
    (warmup_steps: null
    warmup_ratio: null
    min_lr: 0.0
    last_epoch: -1
    max_steps: 141270
    )


INFO: 
  | Name         | Type                              | Params | Mode 
---------------------------------------------------------------------------
0 | preprocessor | AudioToMelSpectrogramPreprocessor | 0      | train
1 | encoder      | ConvASREncoder                    | 18.9 M | train
2 | decoder      | RNNTDecoder                       | 4.2 M  | train
3 | joint        | RNNTJoint                         | 802 K  | train
4 | loss         | RNNTLoss                          | 0      | train
5 | wer          | WER                               | 0      | train
---------------------------------------------------------------------------
5.0 M     Trainable params
18.9 M    Non-trainable params
23.9 M    Total params
95.657    Total estimated model params size (MB)
616       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 17:00:04 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 17:00:04 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding


    
    


[NeMo I 2025-06-12 17:00:10 nemo_logging:393] 
    
[NeMo I 2025-06-12 17:00:10 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 17:00:10 nemo_logging:393] predicted:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcqqqcdcccdhhbbbbbbbbdddddddnnnddddddddddnnnnnnnnnnnnnnnnnnnnddddddddddddddddddddkkkkkddddkddddddddldvvvvvvvvvvvvvvvvvvvvcccccccccccccc cc cckkkkkkkdkkkkkkkkkkkkkkkkkkkkkkppppppppkpqqqqqqqqqqqqqqqcqcqqccccccccccccccccccccccccccccccvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvqqqdddddddddddddddddpppppppppprrrrrcccccvvvvpppvvvvvvvvvvvvvqqqqqqqqvvvvvvvvvvvvkkkkkvvvvkcccccccccccccccccccc          ddcccccccc                    vvvvvvvvvvcccccccccccccccccccc  c      cddddddddddddddddddddccccccccccqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcccbbbbbcbccccccccccqqqqqqqqqqccccccccccccccc

    
    


[NeMo I 2025-06-12 17:00:11 nemo_logging:393] 
    
[NeMo I 2025-06-12 17:00:11 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[NeMo I 2025-06-12 17:00:11 nemo_logging:393] predicted:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccqqqqqqqqqqqqccccqqqqqqqqqqqqqq   ccccccccccccccccc                    ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccqqqdddddddeedddddddddbddbbbbbbecccccccccccccccccccccccccccccccccccccccccccccccccccccccccccbbbbbbbbbb          vvvvvvvvvvddddddddddddddddddddddddddddddccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccuuuuuuuuuucccccuuuuuccccccccccqqqqqqqqqqccccccccccccccccccccccccccccccccccccccccvv v    v kkkkkkkkkkcccccccccc                    qqqbbbbbbbbbbbbbbbbbbbbbbbbbbbhhhhhhhhhhhhhhhhhhhhddddhhhhdhddddddddddddddddddddddddddddddccccccdccccccccccccccccccccccccccccccccccccccc

Training: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 17:00:11 nemo_logging:393] Disabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 17:00:11 nemo_logging:393] Disabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding


    
    


[NeMo I 2025-06-12 17:00:49 nemo_logging:393] 
    
[NeMo I 2025-06-12 17:00:49 nemo_logging:393] reference:are no doubt the effect of hypnotization by the priests some of the strange movements of whole communities during the crusades are to be explained either on the theory of hypnotization or of contagion
[NeMo I 2025-06-12 17:00:49 nemo_logging:393] predicted:
[NeMo I 2025-06-12 17:00:56 nemo_logging:393] 
    
[NeMo I 2025-06-12 17:00:56 nemo_logging:393] reference:and i am not without hope that missus davilow may shortly follow him her presence will be the greatest comfort to you it will give you a motive to save her from unnecessary pain yes yes i will try and you will not go away
[NeMo I 2025-06-12 17:00:56 nemo_logging:393] predicted:
[NeMo I 2025-06-12 17:01:03 nemo_logging:393] 
    
[NeMo I 2025-06-12 17:01:03 nemo_logging:393] reference:the restlessness and jealousy with which it fills a mind naturally active contented and unsuspicious the manner in which it tinges every ev

Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 17:36:47 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 17:36:47 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-12 17:36:48 nemo_logging:393] 
    
[NeMo I 2025-06-12 17:36:48 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 17:36:48 nemo_logging:393] predicted:if i lived as uxuriously as my people do and had servants and cossly gowns the good pintees would saited their queen had more than thay themselves and it would be true
[NeMo I 2025-06-12 17:36:48 nemo_logging:393] 
    
[NeMo I 2025-06-12 17:36:48 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[NeMo 

Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 18:16:50 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 18:16:50 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-12 18:16:51 nemo_logging:393] 
    
[NeMo I 2025-06-12 18:16:51 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 18:16:51 nemo_logging:393] predicted:if i lived as uxuriously as my people do and had servants and costly gowns the good pinkees would aited their queen had more than thay themselves and it would be true
[NeMo I 2025-06-12 18:16:51 nemo_logging:393] 
    
[NeMo I 2025-06-12 18:16:51 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[NeMo I

Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 18:56:41 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 18:56:41 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-12 18:56:43 nemo_logging:393] 
    
[NeMo I 2025-06-12 18:56:43 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 18:56:43 nemo_logging:393] predicted:if i a lived as uxuriously as my people do and had servants and costly gowns the good pintees would saithat their queen had more than thay themselves and it would be true
[NeMo I 2025-06-12 18:56:43 nemo_logging:393] 
    
[NeMo I 2025-06-12 18:56:43 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[Ne

Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 19:36:47 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 19:36:47 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-12 19:36:48 nemo_logging:393] 
    
[NeMo I 2025-06-12 19:36:48 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 19:36:48 nemo_logging:393] predicted:if i a lived as uxuriously as my people do and had servants and costly gowns the good pintees would saited their queen had more than thay themselves and it would be true
[NeMo I 2025-06-12 19:36:49 nemo_logging:393] 
    
[NeMo I 2025-06-12 19:36:49 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[NeM

Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 20:16:54 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 20:16:54 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-12 20:16:55 nemo_logging:393] 
    
[NeMo I 2025-06-12 20:16:55 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 20:16:55 nemo_logging:393] predicted:if i a lived as uxuriously as my people do and had servants and costly gowns the good pintees would saithat their queen had more than that themselves and it would be true
[NeMo I 2025-06-12 20:16:55 nemo_logging:393] 
    
[NeMo I 2025-06-12 20:16:55 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[Ne

Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 20:57:08 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 20:57:08 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-12 20:57:10 nemo_logging:393] 
    
[NeMo I 2025-06-12 20:57:10 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 20:57:10 nemo_logging:393] predicted:if i lived as uxuriously as my people do and had servants and costly gowns the good pintees would saithat their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 20:57:10 nemo_logging:393] 
    
[NeMo I 2025-06-12 20:57:10 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[NeMo

Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 21:37:17 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 21:37:17 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-12 21:37:18 nemo_logging:393] 
    
[NeMo I 2025-06-12 21:37:18 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 21:37:18 nemo_logging:393] predicted:if i lived as uxuriously as my people do and had servants and costly gowns the good pintees would saited their queen had more than thay themselves and it would be true
[NeMo I 2025-06-12 21:37:18 nemo_logging:393] 
    
[NeMo I 2025-06-12 21:37:18 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[NeMo 

Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 22:17:25 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 22:17:25 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-12 22:17:26 nemo_logging:393] 
    
[NeMo I 2025-06-12 22:17:26 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 22:17:26 nemo_logging:393] predicted:if i lived as uxuriously as my people do and had servants and cossly gowns the good pintees would saithat their queen had more than thay themselves and it would be true
[NeMo I 2025-06-12 22:17:26 nemo_logging:393] 
    
[NeMo I 2025-06-12 22:17:26 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[NeMo

Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 22:57:33 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 22:57:33 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-12 22:57:34 nemo_logging:393] 
    
[NeMo I 2025-06-12 22:57:34 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 22:57:34 nemo_logging:393] predicted:if i a lived as uxuriously as my people do and had servants and cossly gowns the good pintees would saited their queen had more than thay themselves and it would be true
[NeMo I 2025-06-12 22:57:35 nemo_logging:393] 
    
[NeMo I 2025-06-12 22:57:35 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[NeM

Validation: |          | 0/? [00:00<?, ?it/s]

[NeMo I 2025-06-12 23:37:34 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_models.EncDecRNNTModel'>.decoding.decoding
[NeMo I 2025-06-12 23:37:34 nemo_logging:393] Enabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
[NeMo I 2025-06-12 23:37:35 nemo_logging:393] 
    
[NeMo I 2025-06-12 23:37:35 nemo_logging:393] reference:if i lived as luxuriously as my people do and had servants and costly gowns the good pinkies would say that their queen had more than they themselves and it would be true
[NeMo I 2025-06-12 23:37:35 nemo_logging:393] predicted:if i lived as uxuriously as my people do and had servants and costly gowns the good pintees would saited their queen had more than thay themselves and it would be true
[NeMo I 2025-06-12 23:37:35 nemo_logging:393] 
    
[NeMo I 2025-06-12 23:37:35 nemo_logging:393] reference:it is much more desirable to be a private citizen happy and care free
[NeMo 