### install

In [32]:
import pandas as pd
import os
import json
import shutil
from jiwer import wer, cer

import glob
import subprocess
import tarfile
import wget
import copy
from omegaconf import OmegaConf, open_dict

import wandb
from tqdm.auto import tqdm

from collections import defaultdict

import re
import unicodedata

import nemo
import nemo.collections.asr as nemo_asr
from nemo.collections.asr.metrics.wer import word_error_rate
from nemo.utils import logging, exp_manager

import torch
import pytorch_lightning as ptl

import time
import gensim
import compress_fasttext
from typing import Dict, List, Literal
from abc import ABC, abstractmethod
from Levenshtein import distance

### dataset preparing

вспомогательные функции

In [33]:
def read_manifest(path):
    manifest = []
    with open(path, 'r') as f:
        for line in f:
            line = line.replace("\n", "")
            data = json.loads(line)
            manifest.append(data)
    return manifest

def load_commonvoice_dict(manifest_path: str):
    """
    Creates dictionary from commonvoice words. 
    """
    manifest_dict = []

    for path in os.listdir(manifest_path):
        if path != '.ipynb_checkpoints':
            manifest_data = read_manifest(os.path.join(manifest_path, path))
            manifest_texts = [elem['text'] for elem in manifest_data]

        for text in manifest_texts:
            words = re.findall("\w+", text)
            words = list(filter(lambda x: len(x.strip()) > 3, words))
            manifest_dict.extend(words)

    return set(manifest_dict)

def load_opencorp_dict(russian_dict_file_path: str) :
    """
    Creates dictionary from opencorp file.
    """
    russian_dict = []

    with open(russian_dict_file_path, 'r', encoding='utf-8') as dict_file :
        for line in dict_file :
            word = line.split('\t')[0].replace('\n', '').lower()
            if re.findall(r'[0-9]+', word) :
                continue
            if word and not word.isdigit() :
                russian_dict.append(word)

    return set(russian_dict)

создание директорий и загрузка нужных файлов

In [34]:
data_dir = 'datasets/'

LANGUAGE = "ru"

manifest_dir = os.path.join('manifests', LANGUAGE)

# fine-tuning

### pre-tranied model and experiment manager init

In [35]:
train_manifests = f'{manifest_dir}/commonvoice_train_manifest_lower.json,'    
train_manifests += f'{manifest_dir}/commonvoice_dev_manifest_lower.json,'    
train_manifests += f'{manifest_dir}/commonvoice4_train_manifest_lower.json,'   
train_manifests += f'{manifest_dir}/commonvoice4_dev_manifest_lower.json,'    
train_manifests += f'{manifest_dir}/commonvoice5_train_manifest_lower.json,'  
train_manifests += f'{manifest_dir}/commonvoice5_dev_manifest_lower.json,'     
train_manifests += f'{manifest_dir}/commonvoice9_train_manifest_lower.json,' 
train_manifests += f'{manifest_dir}/commonvoice9_dev_manifest_lower.json,'     
train_manifests += f'{manifest_dir}/commonvoice10_train_manifest_lower.json,' 
train_manifests += f'{manifest_dir}/commonvoice10_dev_manifest_lower.json,'      
train_manifests += f'{manifest_dir}/commonvoice11_train_manifest_lower.json,'  
train_manifests += f'{manifest_dir}/commonvoice11_dev_manifest_lower.json,'    
# train_manifests += f'golos/train/manifest.json,'                         
train_manifests += f'ruls/train/train_manifest_lower.json,'                    
train_manifests += f'ruls/dev/dev_manifest_lower.json'                        

valid_manifests = f'{manifest_dir}/commonvoice_test_manifest_lower.json,'    
valid_manifests += f'{manifest_dir}/commonvoice4_test_manifest_lower.json,'  
valid_manifests += f'{manifest_dir}/commonvoice5_test_manifest_lower.json,'   
valid_manifests += f'{manifest_dir}/commonvoice9_test_manifest_lower.json,'    
valid_manifests += f'{manifest_dir}/commonvoice10_test_manifest_lower.json,'   
valid_manifests += f'{manifest_dir}/commonvoice11_test_manifest_lower.json,'   
# valid_manifests += f'golos/test/crowd/manifest.json,'                    
# valid_manifests += f'golos/test/farfield/manifest.json,'                 
valid_manifests += f'ruls/test/test_manifest_lower.json'   

In [41]:
def init_model(model, train_manifest_dir, valid_manifest_dir, 
               train_batch_size, valid_batch_size, learning_rate):
    
    cfg = copy.deepcopy(model.cfg)

    with open_dict(cfg):    
        ## TRAIN CONFIG ##
        cfg.train_ds.manifest_filepath = train_manifest_dir
        cfg.train_ds.normalize_transcripts = False
        cfg.train_ds.batch_size = train_batch_size
        cfg.train_ds.num_workers = 32
        cfg.train_ds.pin_memory = True
        cfg.train_ds.trim_silence = True
        cfg.train_ds.sample_rate = 16000

        ## VALID CONFIG ##
        cfg.validation_ds.manifest_filepath = valid_manifest_dir
        cfg.validation_ds.normalize_transcripts = False
        cfg.validation_ds.batch_size = valid_batch_size
        cfg.validation_ds.num_workers = 32
        cfg.validation_ds.pin_memory = True
        cfg.validation_ds.trim_silence = True
        cfg.validation_ds.sample_rate = 16000

    # setup data loaders with new configs
    model.setup_training_data(cfg.train_ds)
    model.setup_multiple_validation_data(cfg.validation_ds)

    ## OPTIMIZERS ##
    with open_dict(model.cfg.optim):
        model.cfg.optim.name = 'novograd'
        model.cfg.optim.lr = learning_rate
        model.cfg.optim.betas = [0.8, 0.5]  
        model.cfg.optim.weight_decay = 0.001  
        model.cfg.optim.sched.name = 'CosineAnnealing'
        model.cfg.optim.sched.warmup_steps = None  
        model.cfg.optim.sched.warmup_ratio = None
        model.cfg.optim.sched.min_lr = 0.0
        model.cfg.optim.sched.last_epoch = -1
            
    ## AUGMENATION ##
    with open_dict(model.cfg.spec_augment):
        model.cfg.spec_augment.freq_masks = 2
        model.cfg.spec_augment.freq_width = 25
        model.cfg.spec_augment.time_masks = 2
        model.cfg.spec_augment.time_width = 0.05
        
        model.cfg.spec_augment.rect_freq = 50
        model.cfg.spec_augment.rect_masks = 5
        model.cfg.spec_augment.rect_time = 120

    model.spec_augmentation = model.from_config_dict(model.cfg.spec_augment)

    model._wer.use_cer = False

    model._wer.log_prediction = False

def init_trainer(model, num_epochs: int, log_every_n_steps: int, val_every_n_epoch: int,
                 name_of_run: str, name_of_project: str, model_name: str):

    trainer = ptl.Trainer(devices=1, 
                          accelerator='gpu', 
                          auto_select_gpus=True,
                          strategy=None,
                          max_epochs=num_epochs, 
                          auto_lr_find=False,
                          accumulate_grad_batches=1,
                          enable_checkpointing=False,
                          logger=False,
                          log_every_n_steps=log_every_n_steps,
                          check_val_every_n_epoch=val_every_n_epoch)

    model.set_trainer(trainer)

    model.cfg = model._cfg

    # Environment variable generally used for multi-node multi-gpu training.
    # In notebook environments, this flag is unnecessary and can cause logs of multiple training runs to overwrite each other.
    os.environ.pop('NEMO_EXPM_VERSION', None)

    config = exp_manager.ExpManagerConfig(
        exp_dir=f'experiments/',
        name=f"ASR-{model_name}-Model-{LANGUAGE}",
        checkpoint_callback_params=exp_manager.CallbackParams(
                               monitor="val_wer",
                               mode="min",
                               always_save_nemo=True,
                               save_best_model=True),
        create_wandb_logger = True, 
        wandb_logger_kwargs = {'name': name_of_run,
                               'project': name_of_project, 
                               'log_model': 'all'})

    config = OmegaConf.structured(config)
    logdir = exp_manager.exp_manager(trainer, config)

    return trainer

In [42]:
# model = nemo_asr.models.ASRModel.from_pretrained("stt_ru_quartznet15x5")

golos_model_path = 'ASR_models/QuartzNet15x5_golos.nemo'
model = nemo.collections.asr.models.EncDecCTCModel.restore_from(golos_model_path) 

[NeMo W 2022-11-22 13:58:30 modelPT:142] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: train/golos_and_mcv.jsonl
    sample_rate: 16000
    labels:
    - ' '
    - а
    - б
    - в
    - г
    - д
    - е
    - ж
    - з
    - и
    - й
    - к
    - л
    - м
    - н
    - о
    - п
    - р
    - с
    - т
    - у
    - ф
    - х
    - ц
    - ч
    - ш
    - щ
    - ъ
    - ы
    - ь
    - э
    - ю
    - я
    batch_size: 64
    trim_silence: false
    max_duration: 20.0
    min_duration: 0.1
    num_workers: 20
    shuffle: true
    is_tarred: false
    tarred_audio_filepaths: null
    tarred_shard_strategy: scatter
    parser: ru
    
[NeMo W 2022-11-22 13:58:30 modelPT:149] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a

[NeMo I 2022-11-22 13:58:30 features:225] PADDING: 16
[NeMo I 2022-11-22 13:58:31 save_restore_connector:243] Model EncDecCTCModel was successfully restored from /home/projects/asr/ASR_models/QuartzNet15x5_golos.nemo.


init model and it's config

In [43]:
init_model(model = model, 
           train_manifest_dir = train_manifests,
           valid_manifest_dir = valid_manifests, 
           train_batch_size = 44, valid_batch_size = 44, 
           learning_rate = 0.001)

[NeMo I 2022-11-22 13:58:43 collections:194] Dataset loaded with 211672 files totalling 327.46 hours
[NeMo I 2022-11-22 13:58:43 collections:195] 0 files were filtered totalling 0.00 hours
[NeMo I 2022-11-22 13:58:45 collections:194] Dataset loaded with 51543 files totalling 83.88 hours
[NeMo I 2022-11-22 13:58:45 collections:195] 0 files were filtered totalling 0.00 hours


In [44]:
trainer = init_trainer(model=model, num_epochs=50, log_every_n_steps=200, val_every_n_epoch=3, 
             name_of_run='golos_finetune_CV_RuLS_withSpecAug', name_of_project='asr_experiments', model_name='Golos')

Auto select gpus: [0]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


[NeMo I 2022-11-22 13:58:47 exp_manager:291] Experiments will be logged at experiments/ASR-Golos-Model-ru/2022-11-22_13-58-47
[NeMo I 2022-11-22 13:58:47 exp_manager:669] TensorboardLogger has been set up


      rank_zero_warn(
    


[NeMo I 2022-11-22 13:58:47 exp_manager:684] WandBLogger has been set up


      rank_zero_deprecation(
    
      rank_zero_deprecation("`Trainer.weights_save_path` has been deprecated in v1.6 and will be removed in v1.8.")
    
[NeMo W 2022-11-22 13:58:47 exp_manager:919] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to -1. Please ensure that max_steps will run for at least 3 epochs to ensure that checkpointing will not error out.


In [None]:
%%time
trainer.fit(model)
wandb.finish()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


[NeMo I 2022-11-22 13:58:48 modelPT:597] Optimizer config = Novograd (
    Parameter Group 0
        amsgrad: False
        betas: [0.8, 0.5]
        eps: 1e-08
        grad_averaging: False
        lr: 0.001
        weight_decay: 0.001
    )
[NeMo I 2022-11-22 13:58:48 lr_scheduler:910] Scheduler "<nemo.core.optim.lr_scheduler.CosineAnnealing object at 0x7fa6ec6ec9a0>" 
    will be used during training (effective maximum steps = 240550) - 
    Parameters : 
    (warmup_steps: null
    warmup_ratio: null
    min_lr: 0.0
    last_epoch: -1
    max_steps: 240550
    )



  | Name              | Type                              | Params
------------------------------------------------------------------------
0 | preprocessor      | AudioToMelSpectrogramPreprocessor | 0     
1 | encoder           | ConvASREncoder                    | 18.9 M
2 | decoder           | ConvASRDecoder                    | 34.9 K
3 | loss              | CTCLoss                           | 0     
4 | spec_augmentation | SpectrogramAugmentation           | 0     
5 | _wer              | WER                               | 0     
------------------------------------------------------------------------
18.9 M    Trainable params
0         Non-trainable params
18.9 M    Total params
75.718    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

Epoch 5, global step 28866: 'val_wer' reached 0.13219 (best 0.13219), saving model to '/home/projects/asr/experiments/ASR-Golos-Model-ru/2022-11-22_13-58-47/checkpoints/ASR-Golos-Model-ru--val_wer=0.1322-epoch=5.ckpt' as top 3
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Validation: 0it [00:00, ?it/s]

load model from saved 

In [None]:
second_model = nemo.collections.asr.models.EncDecCTCModel.restore_from('/content/experiments/lang-ru/ASR-Char-Model-Language-ru/2022-11-01_07-33-02/checkpoints/ASR-Char-Model-Language-ru.nemo')

init_model(second_model, 
           train_manifest_dir = f"{manifest_dir}/commonvoice_train_manifest.json,{manifest_dir}/commonvoice_dev_manifest.json", 
           valid_manifest_dir = f"{manifest_dir}/commonvoice_test_manifest.json", 
           train_batch_size = 32, valid_batch_size = 8)

trainer = init_trainer(num_epochs=1, log_every_n_steps=100, val_every_n_epoch=1, 
                       name_of_run='test_restore_1', name_of_project='asr_experiments')

In [None]:
trainer.fit(second_model)

# Pre-trained model + Spell Checker

In [13]:
class SpellChecker(ABC) :

    def __init__(self, russian_dict_file_path: str = None,
                 additional_vocab_file_path: str = None, 
                 word_2_vocab_file_path: str = None, 
                 commonvoice_path: str = None, 
                 fasttext_path: str = None) :
        """
        russian_dict_file_path - путь к словарю opencorpora_dict
        additional_vocab_file_path - путь к словарю с числительными
        word_2_vocab_file_path - путь к основному словарю
        """

        self.additional_vocab_file_path = additional_vocab_file_path
        self.word_2_vocab_file_path = word_2_vocab_file_path

        self._words_searching_pattern = re.compile(r'[а-яёйa-z0-9,-./:]+')
        self.opencorp_dict = load_opencorp_dict(russian_dict_file_path)

        with open(word_2_vocab_file_path, 'r', encoding='utf-8') as word_2_vocab_file :
            self.vocab_word = json.loads(word_2_vocab_file.read())

        commonvoice_dict = load_commonvoice_dict(commonvoice_path)
        self.opencorp_dict.union(commonvoice_dict)

        if fasttext_path is not None:
            self.use_fasttext = True
            self.fasttext_model = compress_fasttext.models.CompressedFastTextKeyedVectors.load(fasttext_path)
        else:
            self.use_fasttext = False

    @abstractmethod
    def _get_most_similar_words(self, comparison_word: str, max_dist: int, 
                                weights: tuple) -> list :
        pass

    @abstractmethod
    def _fasttext_filtration(self, similar_words, context_words, treshold: float = 0.5):
        pass

    @staticmethod
    def _get_context_window(target, words_list, window_size: int = 1):
        if len(words_list) <= 1:
            return []

        if target < window_size:
            return words_list[0:target] + words_list[target + 1:target + 1 + window_size]

        if target > len(words_list) - 1 - window_size:
            return words_list[target - window_size:target] + words_list[target + 1:]

        return words_list[target - window_size:target] + words_list[target + 1:target + 1 + window_size]

    def correct_message(self, message: str, to_log: bool, 
                        max_dist: int, weights: tuple, 
                        treshold: float = 0.5, window_size: int = 1) -> str :
        plst = time.time()

        message = message.lower()
        if to_log:
            print(f'Found message: {message}')
        words = re.findall(self._words_searching_pattern, message)
        word_res = []
        for word in words :
            if word.replace('.', '').replace('/', '').replace(':', '').isdigit() :
                word_res.append(word)
            else :
                word_res.append(word.replace('.', '').replace('/', '').replace(':', ''))
        words = word_res

        result = []
        for i, word in enumerate(words) :
            if len(word) < 3 or word.isdigit() :
                # skipping words with len < 3
                result.append(word)
                continue

            filtered = False
            similar_words = self._get_most_similar_words(word, max_dist, weights)
            if self.use_fasttext:
                context_words = self._get_context_window(i, words, window_size=window_size)

                if context_words:
                    filtered = True
                    similar_words = self._fasttext_filtration(similar_words, context_words, treshold)
           
            if similar_words :
                if filtered:
                    result.append(similar_words[0]['token'])
                else:
                    result.append(similar_words[0])
            else :
                result.append(word)

        if to_log:
            print(f'Spell checked result: {result}')
        return ' '.join(result)


class LevenshteinSpellChecker(SpellChecker) :

    def __init__(self, russian_dict_file_path: str = None,
                 additional_vocab_file_path: str = None, 
                 word_2_vocab_file_path: str = None, 
                 commonvoice_path: str = None,
                 fasttext_path: str = None) :
        super().__init__(russian_dict_file_path, additional_vocab_file_path,
                         word_2_vocab_file_path, commonvoice_path,
                         fasttext_path)

    def _get_most_similar_words(self, comparison_word: str, max_dist: int, 
                                weights: tuple, treshold: float = 0.5) -> list :
        comparison_word = comparison_word.lower()

        if comparison_word in self.opencorp_dict :
            return [comparison_word]

        most_similar_words = []
        best_distance = max_dist

        vocab_word = self.vocab_word
        # logger.debug(f'In Spell checking using vocab {vocab_word}')

        for word in vocab_word :
            calculated_distance = distance(word, comparison_word, weights = weights)
            if calculated_distance < best_distance :
                best_distance = calculated_distance
                most_similar_words = [word]
            elif calculated_distance == best_distance :
                most_similar_words.append(word)

        return most_similar_words

    def _fasttext_filtration(self, similar_words: List, context_words: List, treshold: float = 0.5) -> List[Dict]:
        """
        Sorts most similar words by fasttext similairty with the context words.
        """
        context_embeddings = [self.fasttext_model[context_word] 
                              for context_word in context_words]

        similar_words_extended = []

        for word in similar_words:
            embedding = self.fasttext_model[word]
            similar_words_extended.append({
                "token": word, 
                "similarity": self.fasttext_model.cosine_similarities(embedding, context_embeddings).sum()
            })

        similar_words_sorted = sorted(similar_words_extended, key=lambda x: x['similarity'])
        return similar_words_sorted

In [10]:
def test_asr_model(model, batch_size:int, manifests: List, message: str):
    
    test_text = []
    test_path = []
    for path in manifests:
        mn = read_manifest(path)
        
        for sample in mn:
            test_text.append(sample['text']) 
            test_path.append(sample['audio_filepath'])

    described_text = model.transcribe(paths2audio_files=test_path, 
                                            batch_size=batch_size, num_workers=32)

    try:
        WER = wer(test_text, described_text)
        CER = cer(test_text, described_text)

        print(f'{message}:')
        print('WER:', WER)
        print('CER:', CER, '\n')
        
        return test_text, described_text
    except:
        print('Cannot calculate WER and CER')
        return test_text, described_text

manifests to test on

In [11]:
mn_list = [f'{manifest_dir}/commonvoice_test_manifest.json', 
           f'{manifest_dir}/commonvoice4_test_manifest.json',
           f'{manifest_dir}/commonvoice5_test_manifest.json',
           f'{manifest_dir}/commonvoice9_test_manifest.json',
           f'{manifest_dir}/commonvoice10_test_manifest.json',
           f'{manifest_dir}/commonvoice11_test_manifest.json']
#            f'golos/test/crowd/manifest.json',                  
#            f'golos/test/farfield/manifest.json',                 
#            f'ruls/test/test_manifest.json']

asr model

In [None]:
golos_ft_model = nemo.collections.asr.models.EncDecCTCModel.restore_from('ASR_models/golos_ft_CV_10epochs.nemo')

spell checker model

In [14]:
spell_checker = LevenshteinSpellChecker(
    russian_dict_file_path='NLP_models/opencorpora_dict.txt',
    additional_vocab_file_path='NLP_models/additional_vocab.yml',
    word_2_vocab_file_path='NLP_models/vocab_word.txt',
    commonvoice_path='manifests/ru',
    fasttext_path='NLP_models/ft_freqprune_400K_100K_pq_300.bin'
)

transcribe

In [21]:
# Spell Checker params
max_distance = 1
to_log = False
weights = (1, 2, 2)  #(insertion, deletion, substitution)
widow_size = 3

In [22]:
# test_text, described_text = test_asr_model(model = golos_ft_model, batch_size = 256,
#                                        manifests = mn_list, 
#                                        message = 'golos fine-tuned 10 epochs on CV dataset')

checked_text = []
for message in tqdm(described_text, desc='checking'):
    output = spell_checker.correct_message(message, to_log, max_distance, weights, widow_size)
    checked_text.append(output)

WER = wer(test_text, checked_text)
CER = cer(test_text, checked_text)
print('WER with checker:', WER)
print('CER with checker:', CER)

checking:   0%|          | 0/50191 [00:00<?, ?it/s]

WER with checker: 0.37455528203405714
CER with checker: 0.07312015340271831


# inference

load pre-trained ru model and calculate WER metric

In [12]:
os.listdir('experiments/ASR-Golos-Model-ru/2022-11-21_23-31-26/checkpoints')[-1]

'ASR-Golos-Model-ru--val_wer=0.1126-epoch=10-last.ckpt'

save .ckpt in .nemo model for inference

In [None]:
folder_path = 'experiments/ASR-Golos-Model-ru/2022-11-21_23-31-26/checkpoints/'
ckpt_path = 'ASR-Golos-Model-ru--val_wer=0.1126-epoch=10-last.ckpt'
model_ft = nemo.collections.asr.models.EncDecCTCModel.load_from_checkpoint(folder_path+ckpt_path)
model_ft.save_to('ASR_models/golos_ft_CV_withSpecAug_10epochs.nemo')

init model and manifests for testing

In [None]:
golos_ft_model = nemo.collections.asr.models.EncDecCTCModel.restore_from('ASR_models/golos_ft_CV_withSpecAug_10epochs.nemo')

transcribe and get metrics

In [15]:
test_text, described_text = test_asr_model(model = golos_ft_model, batch_size = 256,
                            manifests = mn_list, 
                            message = 'golos fine-tuned 10 epochs on CV dataset with SpecAugment')

Transcribing:   0%|          | 0/197 [00:04<?, ?it/s]

golos fine-tuned 10 epochs on CV dataset with SpecAugment:
WER: 0.34212677942430847
CER: 0.06537826071226137 



In [None]:
for pair in zip(test_text[:50], described_text[:50]):
    print('real: ', pair[0])
    print('pred: ', pair[1])

In [34]:
test_text, described_text = test_model(model = basis_ft_model, batch_size = 200,
                                       manifests = mn_list, 
                                       message = 'basis fine-tuned 5 epochs')

Transcribing:   0%|          | 0/251 [00:00<?, ?it/s]

basis fine-tuned 5 epochs:
WER: 0.403339859546465
CER: 0.08250400993740879 



In [35]:
test_text, described_text = test_model(model = basis_model, batch_size = 200,
                                       manifests = mn_list, 
                                       message = 'basis nvidia model')

Transcribing:   0%|          | 0/251 [00:00<?, ?it/s]

basis nvidia model:
WER: 0.41628916280323686
CER: 0.09115673126786382 



In [12]:
test_text, described_text = test_model(model = sber_model, batch_size = 200,
                                       manifests = mn_list, 
                                       message = 'golos model')

Transcribing:   0%|          | 0/251 [00:03<?, ?it/s]

golos model:
WER: 0.9798821745990357
CER: 0.6501251221071166 

