In [5]:
!gpustat

[1m[37mauriga                 [m  Thu Dec 15 16:55:54 2022  [1m[30m510.47.03[m
[36m[0][m [34mTesla V100-SXM2-32GB[m |[31m 38'C[m, [32m 22 %[m | [36m[1m[33m 6627[m / [33m32768[m MB | [1m[30me.trofimenko[m([33m6367M[m)
[36m[1][m [34mTesla V100-SXM2-32GB[m |[31m 34'C[m, [32m  0 %[m | [36m[1m[33m  260[m / [33m32768[m MB |
[36m[2][m [34mTesla V100-SXM2-32GB[m |[31m 33'C[m, [32m  0 %[m | [36m[1m[33m  260[m / [33m32768[m MB |
[36m[3][m [34mTesla V100-SXM2-32GB[m |[31m 31'C[m, [32m  0 %[m | [36m[1m[33m  260[m / [33m32768[m MB |


In [4]:
import torch
torch.__version__

'1.13.0+cu117'

In [15]:
import pandas as pd
import os
import json
import shutil
from jiwer import wer, cer

import glob
import subprocess
import tarfile
import wget
import copy
from omegaconf import OmegaConf, open_dict

import wandb
from tqdm.auto import tqdm

from collections import defaultdict

import re
import unicodedata

import nemo
import nemo.collections.asr as nemo_asr
from nemo.collections.asr.parts.preprocessing import perturb, segment
from nemo.collections.asr.metrics.wer import word_error_rate
from nemo.utils import logging, exp_manager

import torch
import pytorch_lightning as ptl

In [16]:
def read_manifest(path):
    manifest = []
    with open(path, 'r') as f:
        for line in f:
            line = line.replace("\n", "")
            data = json.loads(line)
            manifest.append(data)
    return manifest

data_dir = 'datasets/'

LANGUAGE = "ru"

manifest_dir = os.path.join('manifests', LANGUAGE)

In [17]:
train_manifests = f'{manifest_dir}/commonvoice_train_manifest_lower.json,'    
train_manifests += f'{manifest_dir}/commonvoice_dev_manifest_lower.json,'    
train_manifests += f'{manifest_dir}/commonvoice4_train_manifest_lower.json,'   
train_manifests += f'{manifest_dir}/commonvoice4_dev_manifest_lower.json,'    
train_manifests += f'{manifest_dir}/commonvoice5_train_manifest_lower.json,'  
train_manifests += f'{manifest_dir}/commonvoice5_dev_manifest_lower.json,'     
train_manifests += f'{manifest_dir}/commonvoice9_train_manifest_lower.json,' 
train_manifests += f'{manifest_dir}/commonvoice9_dev_manifest_lower.json,'     
train_manifests += f'{manifest_dir}/commonvoice10_train_manifest_lower.json,' 
train_manifests += f'{manifest_dir}/commonvoice10_dev_manifest_lower.json,'      
train_manifests += f'{manifest_dir}/commonvoice11_train_manifest_lower.json,'  
train_manifests += f'{manifest_dir}/commonvoice11_dev_manifest_lower.json,'    
# train_manifests += f'golos/train/manifest.json,'                         
train_manifests += f'ruls/train/train_manifest_lower.json,'                    
train_manifests += f'ruls/dev/dev_manifest_lower.json'                        

valid_manifests = f'{manifest_dir}/commonvoice_test_manifest_lower.json,'    
valid_manifests += f'{manifest_dir}/commonvoice4_test_manifest_lower.json,'  
valid_manifests += f'{manifest_dir}/commonvoice5_test_manifest_lower.json,'   
valid_manifests += f'{manifest_dir}/commonvoice9_test_manifest_lower.json,'    
valid_manifests += f'{manifest_dir}/commonvoice10_test_manifest_lower.json,'   
valid_manifests += f'{manifest_dir}/commonvoice11_test_manifest_lower.json,'   
# valid_manifests += f'golos/test/crowd/manifest.json,'                    
# valid_manifests += f'golos/test/farfield/manifest.json,'                 
valid_manifests += f'ruls/test/test_manifest_lower.json' 

In [18]:
def init_model(model, train_manifest_dir, valid_manifest_dir, 
               train_batch_size, valid_batch_size, learning_rate):
    
    cfg = copy.deepcopy(model.cfg)
    
    audio_augmentations = dict(
    white_noise = dict(
        prob=0.5,
        min_level=-90,
        max_level=-46
    ),
    gain = dict(
        prob=0.5,
        min_gain_dbfs=0,
        max_gain_dbfs=50
    ))

    with open_dict(cfg):    
        ## TRAIN CONFIG ##
        cfg.train_ds.manifest_filepath = train_manifest_dir
        cfg.train_ds.normalize_transcripts = False
        cfg.train_ds.batch_size = train_batch_size
        cfg.train_ds.num_workers = 32
        cfg.train_ds.pin_memory = True
        cfg.train_ds.trim_silence = True
        cfg.train_ds.sample_rate = 16000
        cfg.train_ds.augmentor = audio_augmentations

        ## VALID CONFIG ##
        cfg.validation_ds.manifest_filepath = valid_manifest_dir
        cfg.validation_ds.normalize_transcripts = False
        cfg.validation_ds.batch_size = valid_batch_size
        cfg.validation_ds.num_workers = 32
        cfg.validation_ds.pin_memory = True
        cfg.validation_ds.trim_silence = True
        cfg.validation_ds.sample_rate = 16000

    # setup data loaders with new configs
    model.setup_training_data(cfg.train_ds)
    model.setup_multiple_validation_data(cfg.validation_ds)

    ## OPTIMIZERS ##
    with open_dict(model.cfg.optim):
        model.cfg.optim.name = 'novograd'
        model.cfg.optim.lr = learning_rate
        model.cfg.optim.betas = [0.8, 0.5]  
        model.cfg.optim.weight_decay = 0.001  
        model.cfg.optim.sched.name = 'CosineAnnealing'
        model.cfg.optim.sched.warmup_steps = None  
        model.cfg.optim.sched.warmup_ratio = None
        model.cfg.optim.sched.min_lr = 0.0
        model.cfg.optim.sched.last_epoch = -1
            
    ## AUGMENATION ##
    with open_dict(model.cfg.spec_augment):
#         model.cfg.spec_augment.freq_masks = 2
#         model.cfg.spec_augment.freq_width = 25
#         model.cfg.spec_augment.time_masks = 2
#         model.cfg.spec_augment.time_width = 0.05
        
        model.cfg.spec_augment.rect_freq = 50
        model.cfg.spec_augment.rect_masks = 5
        model.cfg.spec_augment.rect_time = 120

    model.spec_augmentation = model.from_config_dict(model.cfg.spec_augment)

    model._wer.use_cer = False

    model._wer.log_prediction = False

def init_trainer(model, num_epochs: int, log_every_n_steps: int, val_every_n_epoch: int,
                 name_of_run: str, name_of_project: str, model_name: str):

    trainer = ptl.Trainer(devices=1, 
                          accelerator='gpu', 
                          auto_select_gpus=True,
                          strategy=None,
                          max_epochs=num_epochs, 
                          auto_lr_find=False,
                          accumulate_grad_batches=1,
                          enable_checkpointing=False,
                          logger=False,
                          log_every_n_steps=log_every_n_steps,
                          check_val_every_n_epoch=val_every_n_epoch)

    model.set_trainer(trainer)
    model.cfg = model._cfg

    # Environment variable generally used for multi-node multi-gpu training.
    # In notebook environments, this flag is unnecessary and can cause logs of multiple training runs to overwrite each other.
    os.environ.pop('NEMO_EXPM_VERSION', None)

    config = exp_manager.ExpManagerConfig(
        exp_dir=f'experiments/',
        name=f"ASR-{model_name}-Model-{LANGUAGE}",
        checkpoint_callback_params=exp_manager.CallbackParams(
                               monitor="val_wer",
                               mode="min",
                               always_save_nemo=True,
                               save_best_model=True),
        create_wandb_logger = True, 
        wandb_logger_kwargs = {'name': name_of_run,
                               'project': name_of_project, 
                               'log_model': 'all'})

    config = OmegaConf.structured(config)
    logdir = exp_manager.exp_manager(trainer, config)

    return trainer

In [19]:
model_path = 'ASR_models/golos_ft_withSpecAug_50epochs.nemo'
model = nemo.collections.asr.models.EncDecCTCModel.restore_from(model_path) 

[NeMo W 2022-12-02 14:15:35 modelPT:142] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: manifests/ru/commonvoice_train_manifest_lower.json,manifests/ru/commonvoice_dev_manifest_lower.json,manifests/ru/commonvoice4_train_manifest_lower.json,manifests/ru/commonvoice4_dev_manifest_lower.json,manifests/ru/commonvoice5_train_manifest_lower.json,manifests/ru/commonvoice5_dev_manifest_lower.json,manifests/ru/commonvoice9_train_manifest_lower.json,manifests/ru/commonvoice9_dev_manifest_lower.json,manifests/ru/commonvoice10_train_manifest_lower.json,manifests/ru/commonvoice10_dev_manifest_lower.json,manifests/ru/commonvoice11_train_manifest_lower.json,manifests/ru/commonvoice11_dev_manifest_lower.json,ruls/train/train_manifest_lower.json,ruls/dev/dev_manifest_lower.json
    sample_rate: 16000
    labels:
    - ' '
    - а
    - б
  

[NeMo I 2022-12-02 14:15:35 features:225] PADDING: 16
[NeMo I 2022-12-02 14:15:36 save_restore_connector:243] Model EncDecCTCModel was successfully restored from /home/projects/asr/ASR_models/golos_ft_withSpecAug_50epochs.nemo.


In [20]:
init_model(model = model, 
           train_manifest_dir = train_manifests,
           valid_manifest_dir = valid_manifests, 
           train_batch_size = 32, valid_batch_size = 32, 
           learning_rate = 0.0003)

[NeMo I 2022-12-02 14:15:55 collections:194] Dataset loaded with 211672 files totalling 327.46 hours
[NeMo I 2022-12-02 14:15:55 collections:195] 0 files were filtered totalling 0.00 hours
[NeMo I 2022-12-02 14:15:59 collections:194] Dataset loaded with 51543 files totalling 83.88 hours
[NeMo I 2022-12-02 14:15:59 collections:195] 0 files were filtered totalling 0.00 hours


In [21]:
trainer = init_trainer(model=model, num_epochs=50, 
                       log_every_n_steps=200, val_every_n_epoch=5, 
                       name_of_run='golos_ft50e_withAudAug', 
                       name_of_project='asr_experiments', 
                       model_name='Golos')

Auto select gpus: [0]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


[NeMo I 2022-12-02 14:16:36 exp_manager:291] Experiments will be logged at experiments/ASR-Golos-Model-ru/2022-12-02_14-16-36
[NeMo I 2022-12-02 14:16:36 exp_manager:669] TensorboardLogger has been set up


[NeMo I 2022-12-02 14:16:42 exp_manager:684] WandBLogger has been set up


      rank_zero_deprecation(
    
      rank_zero_deprecation("`Trainer.weights_save_path` has been deprecated in v1.6 and will be removed in v1.8.")
    
[NeMo W 2022-12-02 14:16:42 exp_manager:919] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to -1. Please ensure that max_steps will run for at least 5 epochs to ensure that checkpointing will not error out.


In [None]:
trainer.fit(model)
wandb.finish()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


[NeMo I 2022-12-02 14:16:47 modelPT:597] Optimizer config = Novograd (
    Parameter Group 0
        amsgrad: False
        betas: [0.8, 0.5]
        eps: 1e-08
        grad_averaging: False
        lr: 0.0003
        weight_decay: 0.001
    )
[NeMo I 2022-12-02 14:16:47 lr_scheduler:910] Scheduler "<nemo.core.optim.lr_scheduler.CosineAnnealing object at 0x7fca30523460>" 
    will be used during training (effective maximum steps = 330750) - 
    Parameters : 
    (warmup_steps: null
    warmup_ratio: null
    min_lr: 0.0
    last_epoch: -1
    max_steps: 330750
    )



  | Name              | Type                              | Params
------------------------------------------------------------------------
0 | preprocessor      | AudioToMelSpectrogramPreprocessor | 0     
1 | encoder           | ConvASREncoder                    | 18.9 M
2 | decoder           | ConvASRDecoder                    | 34.9 K
3 | loss              | CTCLoss                           | 0     
4 | spec_augmentation | SpectrogramAugmentation           | 0     
5 | _wer              | WER                               | 0     
------------------------------------------------------------------------
18.9 M    Trainable params
0         Non-trainable params
18.9 M    Total params
75.718    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]