In [1]:
%load_ext autoreload
%autoreload 2

In [28]:
import torch
import numpy as np
import random
import pandas as pd
from torch import nn
from glob import glob
from tqdm.auto import tqdm
from torchaudio import transforms as T
import pytorch_lightning as pl 
from maatool.data.feats_itdataset_v2 import FeatsIterableDatasetV2
from maatool.models.transformer import TransformerWithSinPos
torch.cuda.is_available()

True

In [5]:
import logging
import logging.config

def configure_logging(log_level):
    handlers =  {
            "maa": {
                "class": "logging.StreamHandler",
                "formatter": "maa_basic",
                "stream": "ext://sys.stdout",
            }
    }
    CONFIG = {
        "version": 1,
        "disable_existing_loggers": False,
        "formatters": {"maa_basic": {"format": '%(asctime)s %(name)s %(pathname)s:%(lineno)d - %(levelname)s - %(message)s'}},
        "handlers": handlers,
        "loggers": {"maa": {"handlers": handlers.keys(), "level": log_level}},
        "root": {"handlers": handlers.keys(), "level": log_level}
    }
    logging.config.dictConfig(CONFIG)
configure_logging("INFO")

In [6]:
torch.distributed.is_initialized()

False

In [7]:
from collections import defaultdict

In [134]:
def get_new_tgt(prev_tgt, hyp_logprobs, logits, topk=4):
    """
    prev_tgt - (T, N_hyp)
    hyp_logprobs - (N_hyp, )
    logits - (N_hyp, C)
    """
    assert len(prev_tgt.shape) == 2, f"{prev_tgt.shape=}"
    assert len(hyp_logprobs.shape) == 1, f"{hyp_logprobs.shape=}"
    assert len(logits.shape) == 2, f"{logits.shape=}"
    assert prev_tgt.shape[1] == hyp_logprobs.shape[0] == logits.shape[0], (
        f"{prev_tgt.shape=} {hyp_logprobs.shape=} {logits.shape=}"
    )
        
    nt_topk_logits, nt_topk_idx = logits.topk(k=topk, axis=-1)
    #print("nt_topk_idx", nt_topk_idx, nt_topk_idx.shape)
    # (N, K)
    next_tokens = nt_topk_idx.T.reshape(1, -1)
    #print("next_tokens", next_tokens, next_tokens.shape)
    # (1, N*(repeat k times)) 
    # (T, N*(repeat k times))  
    new_hyp_tgt = torch.concatenate([prev_tgt.repeat(1, topk), next_tokens], axis=0)
    #print(f"{new_hyp_tgt=}", new_hyp_tgt.shape)
    # (T+1, N*(repeat k times))
    new_scores = nt_topk_logits.T.reshape(-1)
    # N*(repeat k times)
    prew_scores = hyp_logprobs.repeat(topk)
    #print("prew_scores", prew_scores)
    # N*(repeat k times)
    new_hyp_logprob = prew_scores + new_scores
    #print("new_hyp_logprob", new_hyp_logprob)
    new_hyp_logprob, idx = new_hyp_logprob.topk(k=topk)
    #print(idx)
    new_hyps = new_hyp_tgt[:, idx]
    #print("new_hyps", new_hyps, new_hyp_logprob)
    # (T+1, N*k), (N,)
    return new_hyps, new_hyp_logprob
    

    
tgt, logits = get_new_tgt(torch.LongTensor([[1,]]), torch.tensor([-1.]), torch.tensor([[-3, -4, -7, -2, -5]]))
print(">>>>\n", tgt, logits)
get_new_tgt(tgt, logits, torch.tensor([[100,    110,  200], 
                                       [100,    110,  200], 
                                       [100,    110,  200], 
                                       [100,    110,  200]]), topk=2)

>>>>
 tensor([[1, 1, 1, 1],
        [3, 0, 1, 4]]) tensor([-3., -4., -5., -6.])


(tensor([[1, 1],
         [3, 0],
         [2, 2]]),
 tensor([197., 196.]))

In [135]:
def sep_ready_tgt(tgt, logprobs, eos_id=2):
    """
    tgt - (T, N)
    logprobs - (N,)
    """
    assert tgt.shape[1] == logprobs.shape[0], (
        f"{tgt.shape=} {logprobs.shape=}"
    )
    
    is_end_mask = ((tgt == eos_id).sum(axis=0) > 0)
    # (N,)
    #print(is_end_mask)
    ready_tgt = tgt[:, is_end_mask]
    ready_logprobs = logprobs[is_end_mask]
    
    ready_list = [(l.cpu().item(), t.cpu().tolist()) for l, t in zip(ready_logprobs, ready_tgt.T)]

    not_ready_tgt = tgt[:, ~is_end_mask]
    not_ready_logprobs = logprobs[~is_end_mask]
    assert not_ready_tgt.shape[1] == not_ready_logprobs.shape[0], (
        f"{not_ready_tgt.shape[1]=} {not_ready_logprobs.shape[0]=}"
    )
    return ready_list, not_ready_tgt, not_ready_logprobs


sep_ready_tgt(torch.LongTensor([[1, 1], [2, 3]]), torch.tensor([-1., -3]))

([(-1.0, [1, 2])],
 tensor([[1],
         [3]]),
 tensor([-3.]))

In [143]:
def set_random_seed(seed):
    if seed < 0:
        seed = seed_from_time()
    random.seed(seed)
    np.random.seed(seed)
    torch.random.manual_seed(seed)
set_random_seed(42)

class SwipeTransformerRecognizer(pl.LightningModule):
    def __init__(self, backbone, learning_rate=1e-4, speed=42):
        super().__init__()
        self.save_hyperparameters(ignore=['backbone'])
        self.backbone = backbone
        self.ce_loss = nn.CrossEntropyLoss(ignore_index=0, reduction='mean')
#         self.spec_aug = torch.nn.Sequential(
#             #T.FrequencyMasking(freq_mask_param=24),
#             #T.TimeMasking(time_mask_param=30),
#             T.TimeMasking(time_mask_param=24), # last dim masking
#         )
        set_random_seed(speed)

    def forward(self, feats, **kwargs):
        # (T, N, E)
#         feats = feats.permute(1, 2, 0)
#         # (N, E, T)
#         feats = self.spec_aug(feats).permute(2, 0, 1)
#         if self.training:
#             #logging.info("Apply specaug")
#             feats = self.spec_aug(feats)
        # (T, N, E)
        return self.backbone(feats, **kwargs)
    
    def get_loss(self, batch):
        # batch - (Time, Batch, ...)
        feats = batch['feats']
        # (Time, Batch, num_feats)
        tgt = batch['targets'][:-1]
        tgt_key_padding_mask = batch['tgt_key_padding_mask'][:, 1:] 
        # (Batch, Seq-1)
        
        logits = self.forward(feats=feats, 
                              tgt=tgt, 
                              src_key_padding_mask=batch['src_key_padding_mask'], 
                              tgt_key_padding_mask=tgt_key_padding_mask) 
        # (Seq-1, Batch, C)
        S, N, C = logits.shape
        targets = batch['targets'][1:]
        # (Seq-1, Batch)
        # print("loss ", logits.shape, targets.shape)
        loss = self.ce_loss(logits.view(-1, C), targets.reshape(-1))
        
        return loss

        
    def training_step(self, batch, batch_idx):
        loss = self.get_loss(batch)
        self.log('train_loss', loss, on_epoch=True, prog_bar=True,  batch_size=len(batch['uids']))
        return loss

    def validation_step(self, batch, batch_idx):
        loss = self.get_loss(batch)
        self.log('valid_loss', loss, on_epoch=True, prog_bar=True, on_step=True,  batch_size=len(batch['uids']))

    def test_step(self, batch, batch_idx):
        loss = self.get_loss(batch)
        self.log('test_loss', loss,  batch_size=len(batch['uids']))

    def configure_optimizers(self):
        # self.hparams available because we called self.save_hyperparameters()
        return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)

    @staticmethod
    def add_model_specific_args(parent_parser):
        parser = ArgumentParser(parents=[parent_parser], add_help=False)
        parser.add_argument('--learning_rate', type=float, default=0.0001)
        return parser
    
    def predict_topk(self, dl, tokenizer, topk=4, bos_id=1, eos_id=2, max_out_len=26, device='cuda'):
        self.eval()
        utt2word= defaultdict(list)
        utt2logs = defaultdict(list)
        pbar = tqdm(dl)
        with torch.no_grad():
            for batch in pbar:
                batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in batch.items()}
                memory = self.backbone.forward_encoder(batch['feats'], 
                                                  src_key_padding_mask=batch['src_key_padding_mask'])
                assert memory.shape[1] == 1, f"{memory.shape=}"
                # (SrcTime, Batch, E)
                tgt = torch.full(size=(1, 1), 
                                 fill_value=bos_id, 
                                 dtype=torch.long, 
                                 device=memory.device)
                hyp_logprobs = torch.zeros((1), device=memory.device)
                tgt_ready = []
                mkpm = batch['src_key_padding_mask']
                for l in range(max_out_len):
                    #print(f"{tgt.shape=}")
                    tgt_logits = self.backbone.forward_decoder(tgt, 
                                                        memory.repeat(1, tgt.shape[1], 1), 
                                                        memory_key_padding_mask=mkpm.repeat((tgt.shape[1], 1)))
                    tgt_logits = tgt_logits.log_softmax(dim=-1)

                    new_tgt, logprobs = get_new_tgt(tgt, hyp_logprobs, tgt_logits[-1], topk=topk)
                    ready, tgt, hyp_logprobs = sep_ready_tgt(new_tgt, logprobs)
                    tgt_ready.extend(ready)
                    if len(tgt_ready) >= topk:
                        break

                uid = batch['uids'][0]
                if len(tgt_ready) == 0:
                    logging.warning(f"tgt_ready is 0 for {uid}. {tgt.shape=}. Use all hyps as ready hyps")
                    tgt_ready = [(l.cpu().item(), t.cpu().tolist()) for l, t in zip(hyp_logprobs, tgt.T)]

                out_indices = []
                for logprob, indices in sorted(tgt_ready, reverse=True):
                    joined = tokenizer.decode(indices) #.split()[0]
                    utt2word[uid].append(joined)
                    utt2logs[uid].append(logprob)
                d = '|'+'|'.join(utt2word[uid]) + "|"
                pbar.set_description(f"{d}".ljust(40, '=')[:40], refresh=False)
        return utt2word, utt2logs

In [144]:
model = TransformerWithSinPos(feats_dim=37, num_tokens=500, num_decoder_layers=8)
pl_module = SwipeTransformerRecognizer(model)
#pl_module = SwipeTransformerRecognizer.load_from_checkpoint('exp/models/transformer_sc/lightning_logs/version_50424998/checkpoints/last.ckpt',backbone=model, map_location='cpu' )
#pl_module = SwipeTransformerRecognizer.load_from_checkpoint('exp/models/t_finetune_with_sa/lightning_logs/version_50448424/checkpoints/last.ckpt',backbone=model, map_location='cpu' )


PositionalEncoding shape is torch.Size([400, 1, 512])


In [139]:
val_ds = FeatsIterableDatasetV2([f"ark:data_feats/valid/feats.ark"], 
                             targets_rspecifier='ark:exp/bpe500/valid-text.int', 
                                shuffle=False,
                               bos_id=1, 
                               eos_id=2,
                               batch_first=False)
val_dataloader = torch.utils.data.DataLoader(val_ds, batch_size=1, collate_fn=val_ds.collate_pad)

train_ds = FeatsIterableDatasetV2([f"ark:{f}" for f in sorted(glob("data_feats/train/feats.*.ark"))],
                                  targets_rspecifier='ark:exp/bpe500/train-text.int.ark', 
                                  shuffle=True,
                                  bos_id=1, 
                                  eos_id=2, 
                                 batch_first=False)

train_dataloader = torch.utils.data.DataLoader(train_ds, batch_size=24, collate_fn=train_ds.collate_pad, 
                                                num_workers=8)

2023-11-11 15:45:10,481 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:44 - INFO - Loading targets from ark:exp/bpe500/valid-text.int


Loading targets...: 0it [00:00, ?it/s]

2023-11-11 15:45:10,796 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:44 - INFO - Loading targets from ark:exp/bpe500/train-text.int.ark


Loading targets...: 0it [00:00, ?it/s]

In [145]:
trainer = pl.Trainer(max_epochs=3, log_every_n_steps=400, reload_dataloaders_every_n_epochs=1,
                    default_root_dir='exp/models/t_finetune_with_sa',
                    callbacks=[pl.callbacks.TQDMProgressBar(refresh_rate=100),
                              pl.callbacks.ModelCheckpoint(every_n_train_steps=10000,
                                                          save_last=True)],
                    accumulate_grad_batches=8,
                    val_check_interval=20000)
                    #check_val_every_n_epoch=1)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [36]:
result = trainer.test(pl_module, val_dataloader)
print(result)
# 0.20462335646152496
# [{'test_loss': 2.9619081020355225}]
# v3.11.11 [{'test_loss': 0.6646422147750854}] /0.27 / 0.23

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Testing: 0it [00:00, ?it/s]

2023-11-11 11:51:08,879 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark


[{'test_loss': 0.20462335646152496}]


In [146]:
trainer.fit(pl_module, train_dataloader, val_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                  | Params
---------------------------------------------------
0 | backbone | TransformerWithSinPos | 53.1 M
1 | ce_loss  | CrossEntropyLoss      | 0     
---------------------------------------------------
53.1 M    Trainable params
0         Non-trainable params
53.1 M    Total params
212.322   Total estimated model params size (MB)
SLURM auto-requeueing enabled. Setting signal handlers.


Sanity Checking: 0it [00:00, ?it/s]

2023-11-11 15:46:54,930 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark


  rank_zero_warn(
  rank_zero_warn(


2023-11-11 15:46:56,000 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.108.ark
2023-11-11 15:46:55,999 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.63.ark
2023-11-11 15:46:56,000 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.93.ark
2023-11-11 15:46:56,046 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.80.ark
2023-11-11 15:46:56,052 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.89.ark
2023-11-11 15:46:56,055 root /mnt/

Training: 0it [00:00, ?it/s]

2023-11-11 15:46:57,180 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.1.ark
2023-11-11 15:46:57,180 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.42.ark
2023-11-11 15:46:57,180 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.14.ark
2023-11-11 15:46:57,233 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.123.ark
2023-11-11 15:46:57,258 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.45.ark
2023-11-11 15:46:57,260 root /mnt/a

Validation: 0it [00:00, ?it/s]

2023-11-11 16:19:31,510 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 16:39:40,876 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.27.ark
2023-11-11 16:39:41,004 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.20.ark
2023-11-11 16:39:41,095 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.87.ark
2023-11-11 16:39:41,207 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.101.ark
2023-11-11 16:39:41,297 root /mnt/asr

Validation: 0it [00:00, ?it/s]

2023-11-11 16:53:55,233 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 17:06:58,711 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.63.ark
2023-11-11 17:06:58,788 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.13.ark
2023-11-11 17:06:58,893 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.43.ark
2023-11-11 17:06:58,979 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.44.ark
2023-11-11 17:06:59,081 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-11 17:28:27,919 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 17:34:23,350 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.128.ark
2023-11-11 17:34:23,476 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.79.ark
2023-11-11 17:34:23,586 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.122.ark
2023-11-11 17:34:23,690 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.15.ark
2023-11-11 17:34:23,791 root /mnt/as

Validation: 0it [00:00, ?it/s]

2023-11-11 18:03:04,219 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 18:27:16,691 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.34.ark
2023-11-11 18:27:16,775 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.64.ark
2023-11-11 18:27:16,874 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.36.ark
2023-11-11 18:27:16,971 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.95.ark
2023-11-11 18:27:17,048 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-11 18:37:27,889 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 18:54:43,010 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.120.ark
2023-11-11 18:54:43,096 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.28.ark
2023-11-11 18:54:43,168 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.29.ark
2023-11-11 18:54:43,311 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.22.ark
2023-11-11 18:54:43,433 root /mnt/asr

Validation: 0it [00:00, ?it/s]

2023-11-11 19:12:01,761 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 19:22:04,691 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.2.ark
2023-11-11 19:22:04,787 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.57.ark
2023-11-11 19:22:04,912 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.94.ark
2023-11-11 19:22:05,002 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.59.ark
2023-11-11 19:22:05,092 root /mnt/asr_h

Validation: 0it [00:00, ?it/s]

2023-11-11 19:46:36,027 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 19:49:27,020 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.70.ark
2023-11-11 19:49:27,099 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.107.ark
2023-11-11 19:49:27,180 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.65.ark
2023-11-11 19:49:27,285 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.66.ark
2023-11-11 19:49:27,367 root /mnt/asr

Validation: 0it [00:00, ?it/s]

2023-11-11 20:21:24,926 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 20:42:51,060 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.113.ark
2023-11-11 20:42:51,142 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.86.ark
2023-11-11 20:42:51,256 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.72.ark
2023-11-11 20:42:51,355 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.37.ark
2023-11-11 20:42:51,443 root /mnt/asr

Validation: 0it [00:00, ?it/s]

2023-11-11 20:56:29,987 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 21:10:46,824 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.49.ark
2023-11-11 21:10:46,936 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.5.ark
2023-11-11 21:10:47,008 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.58.ark
2023-11-11 21:10:47,127 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.3.ark
2023-11-11 21:10:47,206 root /mnt/asr_ho

Validation: 0it [00:00, ?it/s]

2023-11-11 21:31:17,943 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 21:38:16,192 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.92.ark
2023-11-11 21:38:16,281 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.93.ark
2023-11-11 21:38:16,359 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.50.ark
2023-11-11 21:38:16,495 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.80.ark
2023-11-11 21:38:16,574 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-11 22:05:58,321 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 22:31:20,059 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.106.ark
2023-11-11 22:31:20,142 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.10.ark
2023-11-11 22:31:20,232 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.100.ark
2023-11-11 22:31:20,310 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.109.ark
2023-11-11 22:31:20,374 root /mnt/a

Validation: 0it [00:00, ?it/s]

2023-11-11 22:40:40,835 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 22:59:23,799 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.3.ark
2023-11-11 22:59:23,798 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.106.ark
2023-11-11 22:59:23,799 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.45.ark
2023-11-11 22:59:23,798 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.20.ark
2023-11-11 22:59:23,799 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-11 23:32:01,782 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-11 23:52:26,064 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.41.ark
2023-11-11 23:52:26,134 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.121.ark
2023-11-11 23:52:26,215 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.36.ark
2023-11-11 23:52:26,309 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.59.ark
2023-11-11 23:52:26,377 root /mnt/asr

Validation: 0it [00:00, ?it/s]

2023-11-12 00:06:52,815 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 00:20:07,211 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.34.ark
2023-11-12 00:20:07,300 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.86.ark
2023-11-12 00:20:07,437 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.115.ark
2023-11-12 00:20:07,583 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.15.ark
2023-11-12 00:20:07,665 root /mnt/asr

Validation: 0it [00:00, ?it/s]

2023-11-12 00:41:50,947 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 00:47:51,300 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.49.ark
2023-11-12 00:47:51,404 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.93.ark
2023-11-12 00:47:51,475 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.58.ark
2023-11-12 00:47:51,574 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.37.ark
2023-11-12 00:47:51,690 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-12 01:16:40,729 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 01:41:06,929 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.78.ark
2023-11-12 01:41:07,024 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.10.ark
2023-11-12 01:41:07,127 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.50.ark
2023-11-12 01:41:07,219 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.80.ark
2023-11-12 01:41:07,313 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-12 01:51:38,629 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 02:08:58,481 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.120.ark
2023-11-12 02:08:58,587 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.71.ark
2023-11-12 02:08:58,668 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.108.ark
2023-11-12 02:08:58,736 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.44.ark
2023-11-12 02:08:58,842 root /mnt/as

Validation: 0it [00:00, ?it/s]

2023-11-12 02:26:34,124 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 02:36:51,040 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.85.ark
2023-11-12 02:36:51,135 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.42.ark
2023-11-12 02:36:51,248 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.14.ark
2023-11-12 02:36:51,331 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.3.ark
2023-11-12 02:36:51,433 root /mnt/asr_h

Validation: 0it [00:00, ?it/s]

2023-11-12 03:01:53,212 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 03:04:50,248 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.2.ark
2023-11-12 03:04:50,335 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.13.ark
2023-11-12 03:04:50,437 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.43.ark
2023-11-12 03:04:50,567 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.101.ark
2023-11-12 03:04:50,650 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-12 03:36:55,173 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 03:58:25,226 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.70.ark
2023-11-12 03:58:25,334 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.20.ark
2023-11-12 03:58:25,421 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.72.ark
2023-11-12 03:58:25,525 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.73.ark
2023-11-12 03:58:25,613 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-12 04:11:53,303 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 04:26:12,061 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.56.ark
2023-11-12 04:26:12,160 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.107.ark
2023-11-12 04:26:12,264 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.21.ark
2023-11-12 04:26:12,384 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.95.ark
2023-11-12 04:26:12,474 root /mnt/asr

Validation: 0it [00:00, ?it/s]

2023-11-12 04:46:42,407 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 04:53:42,474 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.1.ark
2023-11-12 04:53:42,550 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.79.ark
2023-11-12 04:53:42,664 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.29.ark
2023-11-12 04:53:42,760 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.22.ark
2023-11-12 04:53:42,882 root /mnt/asr_h

Validation: 0it [00:00, ?it/s]

2023-11-12 05:21:16,175 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 05:46:55,844 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.113.ark
2023-11-12 05:46:55,923 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.57.ark
2023-11-12 05:46:55,998 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.8.ark
2023-11-12 05:46:56,100 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.123.ark
2023-11-12 05:46:56,178 root /mnt/asr

Validation: 0it [00:00, ?it/s]

2023-11-12 05:56:25,695 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 06:14:47,509 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.64.ark
2023-11-12 06:14:47,510 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.66.ark
2023-11-12 06:14:47,509 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.87.ark
2023-11-12 06:14:47,512 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.30.ark
2023-11-12 06:14:47,544 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-12 06:47:37,144 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 07:08:07,273 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.49.ark
2023-11-12 07:08:07,371 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.64.ark
2023-11-12 07:08:07,488 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.14.ark
2023-11-12 07:08:07,595 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.15.ark
2023-11-12 07:08:07,688 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-12 07:22:36,407 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 07:35:50,081 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.106.ark
2023-11-12 07:35:50,145 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.28.ark
2023-11-12 07:35:50,206 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.108.ark
2023-11-12 07:35:50,337 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.73.ark
2023-11-12 07:35:50,451 root /mnt/as

Validation: 0it [00:00, ?it/s]

2023-11-12 07:57:25,948 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark
2023-11-12 08:03:29,035 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.70.ark
2023-11-12 08:03:29,107 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.93.ark
2023-11-12 08:03:29,216 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.65.ark
2023-11-12 08:03:29,327 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/train/feats.95.ark
2023-11-12 08:03:29,418 root /mnt/asr_

Validation: 0it [00:00, ?it/s]

2023-11-12 08:32:10,597 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark


  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [13]:
import sentencepiece as spm
import math
tokenizer = spm.SentencePieceProcessor('exp/bpe500/model.model')

In [14]:
utt2words, utt2logs = pl_module.cuda().predict_topk(val_dataloader, tokenizer=tokenizer, topk=10, device='cuda')

  0%|          | 0/10000 [00:00<?, ?it/s]

2023-11-11 12:48:29,606 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark


  'feats': torch.as_tensor(feats, dtype=torch.float32),


In [15]:
utt2words

defaultdict(list,
            {'valid-0': ['на',
              'нас',
              'на на',
              'наа',
              'нана',
              'нам',
              'наи',
              'нача',
              'напа',
              'на-а',
              'на-на'],
             'valid-1': ['все',
              'всем',
              'вчера',
              'усе',
              'све',
              'осве',
              'се',
              'свет',
              'свасе',
              'ава',
              'аван'],
             'valid-2': ['этом',
              'этом',
              'этим',
              'это',
              'потом',
              'эти',
              'эта',
              'этому',
              'этот',
              'дом',
              'этими'],
             'valid-3': ['добрый',
              'доброй',
              'добрые',
              'добрый',
              'добрым',
              'доброе',
              'добой',
              'дорогой',
              'домой',
   

In [16]:
def accuracy(ref_u2w, hyp_u2w):
    corr = 0
    err = 0
    total = len(ref_u2w)
    for u, ref in tqdm(ref_u2w.items()):
        hyp = hyp_u2w[u].strip('-')
        if ref != hyp:
            print(ref, hyp)
            err +=1
        else:
            corr +=1
    a = corr/total
    print(f"{total=} {corr=} {err=}, accuracy: {a}")
    return a

with open('data_feats/valid/text') as f:
    valid_ref_u2w = {u:w for u, w in   map(str.split, f.readlines())}
    

In [17]:
accuracy(valid_ref_u2w, {k:v[0] for k, v in utt2words.items()})
# v2.topk2 total=10000 corr=8429 err=1571, accuracy: 0.8429
# v2.topk5 total=10000 corr=8434 err=1566, accuracy: 0.8434
# v2.topk10 total=10000 corr=8388 err=1612, accuracy: 0.8388
# v3.topk10 total=10000 corr=8519 err=1481, accuracy: 0.8519  <--
# v3.11.11.topk10 total=10000 corr=8340 err=1660, accuracy: 0.834

  0%|          | 0/10000 [00:00<?, ?it/s]

геев гены
была быстро
рам нам
шакалов заказала
воля волосы
ура уля
шорты шорту
корень удобно
но но но но но но но но но но но
купи курс
говорить говорит
водитель водителю
вечером вечер
фиолетовой фиолетовое
выехал выезжать
выздоравливай выдавливай
черна черны
выгуливать выгулить
вызовов вызвала
пробовал попробовать
стать стараюсь
отвечал отвечать
ха за
русскому русском
мазок мазлолетка
не на
обувь обед
завтра заработать
он лорн
никогда никого
баба бабу
но но но но но но но но но но но
пойми пофиг
кн еген
виде видео
пахлава пахова
уезжай езжай
прошу проще
дура духов
агапкина агркина
ниже нижнее
верон вероника
анадырь аналогию
лада ладно
заберем забери
завтра звоню
тыс там
стоит строит
выбил фибир
прошел пошел
романович романтичная
ирбит ирбить
глав главное
работы работаю
доллар дождусь
занимаешься заречь
жень день
приветик привет
мм ммм
выглядят выглядит
крот корот
потому плитки
привете привет
ест есть
лежит делись
но но но но но но но но но но но
пойдут пойдет
дека днем
екб куб
прожить

0.8519

In [18]:
with open('./data/voc.txt') as f:
    vocab = frozenset(s for s in map(str.strip, f.readlines()))

In [19]:
lv = {}
for k, v in utt2words.items():
    corr_w = None
    for w in v:
        if w in vocab:
            corr_w = w
            break
    if corr_w is None: 
        logging.warning(f"{k=} doesn't have any vocab hyp. {v=}")
        corr_w = '-'
    lv[k] = corr_w
accuracy(valid_ref_u2w, lv)
# v2.topk10 total=10000 corr=8542 err=1458, accuracy: 0.8542
# v3.topk10 total=10000 corr=8665 err=1335, accuracy: 0.8665
# v3.11.11.topk10 total=10000 corr=8429 err=1571, accuracy: 0.8429



  0%|          | 0/10000 [00:00<?, ?it/s]

геев гены
была быстро
рам нам
шакалов заказала
воля волосы
ура уля
корень удобно
купи курс
говорить говорит
водитель водителю
вечером вечер
фиолетовой фиолетовое
выехал выезжать
выздоравливай выдавливай
черна черны
выгуливать выгулять
вызовов вызвала
пробовал попробовать
стать стараюсь
отвечал отвечать
ха за
русскому русском
мазок мажора
не на
обувь обед
завтра заработать
он борн
никогда никого
баба бабу
пойми пофиг
кн угу
виде видео
пахлава 
уезжай езжай
прошу проще
дура духов
агапкина агрессии
ниже нижнее
верон вероника
анадырь аналогию
лада ладно
заберем забери
завтра звоню
тыс там
стоит строит
выбил футболка
прошел пошел
романович романтичная
глав главное
работы работаю
доллар дождусь
занимаешься закончилась
жень день
приветик привет
мм ммм
выглядят выглядит
крот корот
потому плитки
привете привет
ест есть
лежит делись
пойдут пойдет
дека днем
екб куб
прожить пожить
марин марии
лай дай
был было
проспект приятного
считал считай
нами наст
ну не
система систем
проснулась проснулся
но н

0.8665

In [20]:
with open('data_feats/valid/grid_name') as f:
    u2g = {u:g for u,g in map(str.split, f.readlines())}


In [21]:
valid_ref_u2w_d = {k:v for k,v in valid_ref_u2w.items() if u2g[k] == 'default'}
valid_ref_u2w_e = {k:v for k,v in valid_ref_u2w.items() if u2g[k] == 'extra'}
print(len(valid_ref_u2w_d), len(valid_ref_u2w_e))
lv_d = {k:v for k,v in lv.items() if u2g[k] == 'default'}
lv_e = {k:v for k,v in lv.items() if u2g[k] == 'extra'}
print(len(lv_d), len(lv_e))

print(accuracy(valid_ref_u2w_d, lv_d))
print(accuracy(valid_ref_u2w_e, lv_e))

9416 584
9416 584


  0%|          | 0/9416 [00:00<?, ?it/s]

геев гены
была быстро
рам нам
шакалов заказала
воля волосы
ура уля
корень удобно
купи курс
говорить говорит
водитель водителю
вечером вечер
фиолетовой фиолетовое
выехал выезжать
выздоравливай выдавливай
черна черны
выгуливать выгулять
вызовов вызвала
пробовал попробовать
стать стараюсь
отвечал отвечать
ха за
русскому русском
мазок мажора
не на
обувь обед
завтра заработать
он борн
никогда никого
баба бабу
пойми пофиг
кн угу
виде видео
пахлава 
уезжай езжай
прошу проще
дура духов
агапкина агрессии
ниже нижнее
верон вероника
анадырь аналогию
лада ладно
заберем забери
завтра звоню
тыс там
стоит строит
прошел пошел
романович романтичная
глав главное
работы работаю
доллар дождусь
занимаешься закончилась
жень день
приветик привет
мм ммм
выглядят выглядит
крот корот
потому плитки
привете привет
ест есть
лежит делись
пойдут пойдет
дека днем
екб куб
прожить пожить
марин марии
лай дай
был было
считал считай
нами наст
ну не
система систем
проснулась проснулся
но нот
заварили звонили
понятно понятн

  0%|          | 0/584 [00:00<?, ?it/s]

выбил футболка
проспект приятного
наказаний наказывай
мерк мере
поверить поворот
не на
норм ели
выбивается выбирается
сообщили собираешься
древняя деревня
приходит приходи
хай зай
призывников праздником
плов пора
виталия виртуальная
любовь люблю
нет него
не нее
аккорды аккорда
щенка зеленка
надо над
но нот
нечего ничего
дворе двое
спорт сорт
да для
приехать приехал
стоят стоя
му мк
ша ща
дочь даст
ранняя раня
сама масса
люди люба
оттепель откроешь
начало начал
брад бад
ярмак ярма
оплате оплата
про по
норидж норд
схожу схоже
го ооо
хахаха хахахаха
не нее
оставить оставь
обойдусь обойду
садике садики
завотделением 
удалила удала
раздевала раздевалась
не нее
то ооо
вон вроде
овощами рахмат
успею успеваю
разговаривать раздавать
ассоциируюсь ассоциацию
проводов провод
просто прости
ель едь
нее не
оке окну
валерьевич влюбленности
города годом
поста посмотрим
устроил кирилл
про по
султанша султана
январь января
эд эл
начнет насте
удачного классного
не нее
total=584 corr=510 err=74, accuracy: 

In [73]:
test_ds =  FeatsIterableDatasetV2([f"ark:data_feats/test/feats.ark"], shuffle=False, 
                                 bos_id=1, 
                                 eos_id=2, 
                                 batch_first=False)
test_dataloader = torch.utils.data.DataLoader(test_ds, batch_size=1, collate_fn=test_ds.collate_pad)
#test_u2w = predict(pl_module.backbone, test_dataloader)
test_u2w, test_u2l = pl_module.cuda().predict_topk(test_dataloader, tokenizer=tokenizer, topk=8)

0it [00:00, ?it/s]

2023-11-11 14:13:32,549 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/test/feats.ark


In [76]:
def limit_vocab(u2w, vocab=vocab):
    lv = {}
    for k, v in u2w.items():
        corr_w = []
        for w in v:
            if w in vocab:
                corr_w.append(w)
        if len(corr_w) == 0: 
            logging.warning(f"{k=} doesn't have any vocab hyp. {v=}")
            corr_w = []
        lv[k] = corr_w
    return lv
test_lv = limit_vocab(test_u2w)



In [83]:
baseline_result = pd.read_csv('./keyboard_start/result/baseline.csv', sep=',', names=['main', 'second', 'third', 'trash'])
#baseline_result = pd.read_csv('exp/models/ctc_trans/lightning_logs/version_50422251/test_submit.v1.csv', sep=',', names=['main', 'second', 'third', 'trash'])
#baseline_result = 
baseline_result['uid'] = [f'test-{i}' for i in range(len(baseline_result))]
baseline_result.head()

baseline_result['predict'] = baseline_result.uid.apply(lambda x: test_lv[x])
baseline_result.head()
rows = []
for i, row in baseline_result.iterrows():
    ps = row['predict']
    for p in [row['main'], row['second'], row['third'], row['trash']]:
        if p not in ps:
            ps.append(p)
    rows.append(ps[:4])
        
submission = pd.DataFrame(rows, columns=['main', 'second', 'third', 'trash'])
submission.to_csv("exp/models/t_finetune_test/lightning_logs/version_50454149/test_submit.v8.csv", 
                  sep=',', header=False, index=False)
submission.head()

Unnamed: 0,main,second,third,trash
0,на,нана,нас,га
1,что,что-то,сто,часто
2,опоздания,опоздание,опозданий,опозданиям
3,сколько,скольки,столько,сколько
4,дремать,думать,снимать,дописать


In [111]:
s5_df = pd.read_csv('exp/models/t_finetune_test/lightning_logs/version_50454149/test_submit.v10.csv', sep=',', names=['main', 'second', 'third', 'trash'])
s5_df['uid'] = [f'test-{i}' for i in range(len(s5_df))]
s5_df.head()

weak_sup_test = {row['uid']: row['main'] if row['main']!= '-' else row['second'] for i, row in s5_df.iterrows()}

with open('exp/models/t_finetune_test/lightning_logs/version_50454149/v10.test.text', 'w') as f:
          f.write(''.join(f"{u} {t}\n" for u, t in weak_sup_test.items()))

In [112]:
mask =s5_df['main'].isna()
s5_df['main'][mask] #= s5_df['second'][mask]

Series([], Name: main, dtype: object)

In [113]:
s5_df.head()

Unnamed: 0,main,second,third,trash,uid
0,на,нас,нана,нам,test-0
1,что,что-то,сто,чтоб,test-1
2,опоздания,опоздание,опозданиям,опозданий,test-2
3,сколько,скольки,сколько,столько,test-3
4,дремать,донимать,думать,дописать,test-4


In [39]:
# s5_df[["main", 'second', 'third', 'trash']].to_csv("exp/models/transformer_sc/lightning_logs/version_50424998/test_submit.v6.csv", 
#                   sep=',', header=False, index=False)

In [40]:
#!head "exp/models/transformer_sc/lightning_logs/version_50424998/test_submit.v6.csv"

на,неа,на,ненка
что,часто,частого,чисто
опоздания,опозданиям,оприходования,опозданиями
сколько,сокольского,свердловского,скроено
дремать,дописать,донимать,дюрренматт
не,неук,нк,ненка
как,капак,капе,капуе
садовод,спародировал,садовод,сурдоперевод
заметил,знаменито,знаменитого,замерил
ваги,ваенги,венгрии,ванги


In [114]:
!head 'exp/models/t_finetune_test/lightning_logs/version_50454149/v10.test.text'

test-0 на
test-1 что
test-2 опоздания
test-3 сколько
test-4 дремать
test-5 не
test-6 как
test-7 садовод
test-8 заметил
test-9 ваги


In [115]:
weak_sup_test

{'test-0': 'на',
 'test-1': 'что',
 'test-2': 'опоздания',
 'test-3': 'сколько',
 'test-4': 'дремать',
 'test-5': 'не',
 'test-6': 'как',
 'test-7': 'садовод',
 'test-8': 'заметил',
 'test-9': 'ваги',
 'test-10': 'ок',
 'test-11': 'плинтус',
 'test-12': 'ай',
 'test-13': 'ищем',
 'test-14': 'лет',
 'test-15': 'могу',
 'test-16': 'может',
 'test-17': 'спокойной',
 'test-18': 'рядом',
 'test-19': 'вспоминать',
 'test-20': 'максим',
 'test-21': 'веселое',
 'test-22': 'невинные',
 'test-23': 'туда',
 'test-24': 'тебя',
 'test-25': 'ре',
 'test-26': 'точно',
 'test-27': 'чего',
 'test-28': 'помою',
 'test-29': 'хорошо',
 'test-30': 'укладки',
 'test-31': 'нужны',
 'test-32': 'ты',
 'test-33': 'почему',
 'test-34': 'не',
 'test-35': 'поеду',
 'test-36': 'то',
 'test-37': 'быть',
 'test-38': 'не',
 'test-39': 'завтраки',
 'test-40': 'будем',
 'test-41': 'дома',
 'test-42': 'со',
 'test-43': 'свою',
 'test-44': 'он',
 'test-45': 'было',
 'test-46': 'человек',
 'test-47': 'погоди',
 'test-48': 

In [116]:
encoded = {u: tokenizer.encode(v, out_type="immutable_proto") for u, v in weak_sup_test.items()}


In [117]:
with open(f"exp/models/t_finetune_test/lightning_logs/version_50454149/v10.test.piece", "w") as f:
    f.write(''.join([f"{u} " + " ".join(e.piece for e in line.pieces) + '\n' for u, line in encoded.items()]))
with open(f"exp/models/t_finetune_test/lightning_logs/version_50454149/v10.test.int", "w") as f:
    f.write(''.join([f"{u} " + " ".join(str(e.id) for e in line.pieces) + '\n' for u, line in encoded.items()]))

In [118]:
!head exp/models/t_finetune_test/lightning_logs/version_50454149/v10.test.piece
!head exp/models/t_finetune_test/lightning_logs/version_50454149/v10.test.int

test-0 ▁на
test-1 ▁что
test-2 ▁о по з да ни я
test-3 ▁сколько
test-4 ▁д рем а ть
test-5 ▁не
test-6 ▁как
test-7 ▁са до во д
test-8 ▁за м ет и л
test-9 ▁ва ги
test-0 33
test-1 69
test-2 11 179 484 53 48 487
test-3 400
test-4 9 360 468 23
test-5 17
test-6 27
test-7 236 107 55 477
test-8 37 480 30 472 476
test-9 218 211


In [92]:
#!cat exp/bpe500/valid-text.int exp/models/transformer_sc/lightning_logs/version_50424998/test.int >  exp/models/transformer_sc/lightning_logs/version_50424998/valid_test.int

In [121]:
finetune_ds = FeatsIterableDatasetV2([f"ark:data_feats/test/feats.ark"], 
                                targets_rspecifier='ark:exp/models/t_finetune_test/lightning_logs/version_50454149/v10.test.int', 
                                shuffle=False,
                                bos_id=1, 
                                eos_id=2,
                                batch_first=False)
finetune_dataloader = torch.utils.data.DataLoader(finetune_ds, 
                                             batch_size=24,
                                             #num_workers=2, 
                                             collate_fn=finetune_ds.collate_pad)

2023-11-11 15:24:19,952 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:44 - INFO - Loading targets from ark:exp/models/t_finetune_test/lightning_logs/version_50454149/v10.test.int


Loading targets...: 0it [00:00, ?it/s]

In [127]:
model = TransformerWithSinPos(feats_dim=37, num_tokens=500)
pl_module = SwipeTransformerRecognizer.load_from_checkpoint('exp/models/transformer_sc/lightning_logs/version_50424998/checkpoints/last.ckpt',backbone=model, map_location='cpu' )
#pl_module = SwipeTransformerRecognizer.load_from_checkpoint('exp/models/t_finetune_with_sa/lightning_logs/version_50448424/checkpoints/last.ckpt',backbone=model, map_location='cpu' )
pl_module.hparams.learning_rate = 1e-5 

PositionalEncoding shape is torch.Size([400, 1, 512])


In [128]:
trainer = pl.Trainer(max_epochs=2, log_every_n_steps=100, reload_dataloaders_every_n_epochs=1,
                    default_root_dir='exp/models/t_finetune_test',
                    callbacks=[pl.callbacks.TQDMProgressBar(refresh_rate=100),
                              pl.callbacks.ModelCheckpoint(#every_n_train_steps=10000,
                                                          save_last=True)],
                    accumulate_grad_batches=8, # )
                    #val_check_interval=20000)
                    check_val_every_n_epoch=10)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [129]:
trainer.fit(pl_module, finetune_dataloader, val_dataloader)

  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                  | Params
---------------------------------------------------
0 | backbone | TransformerWithSinPos | 44.7 M
1 | ce_loss  | CrossEntropyLoss      | 0     
---------------------------------------------------
44.7 M    Trainable params
0         Non-trainable params
44.7 M    Total params
178.690   Total estimated model params size (MB)
SLURM auto-requeueing enabled. Setting signal handlers.


Sanity Checking: 0it [00:00, ?it/s]

2023-11-11 15:27:16,929 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

2023-11-11 15:27:16,973 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/test/feats.ark
2023-11-11 15:27:53,393 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/test/feats.ark


`Trainer.fit` stopped: `max_epochs=2` reached.


In [130]:
trainer.test(pl_module, val_dataloader)
# [{'test_loss': 0.12403599917888641}]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

2023-11-11 15:28:33,450 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark


[{'test_loss': 0.29372653365135193}]

In [131]:
valid_u2w, valid_u2l = pl_module.cuda().predict_topk(val_dataloader, tokenizer=tokenizer, topk=8)
valid_lv = limit_vocab(valid_u2w)
accuracy(valid_ref_u2w, {u: v[0] if len(v) else '' for u, v in valid_lv.items()})
# v9 total=10000 corr=8750 err=1250, accuracy: 0.875

  0%|          | 0/10000 [00:00<?, ?it/s]

2023-11-11 15:29:48,703 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/valid/feats.ark


  0%|          | 0/10000 [00:00<?, ?it/s]

геев гены
была басса
рам нам
да да-да
что что-то
шакалов заказы
воля волосы
ура уля
корень удачного
купи курс
из из-за
говорить говорит
водитель водителю
мед инд
вечером вечер
фиолетовой фиолетовое
со сол
выехал выезжать
мы ивы
выздоравливай выдавливай
черна черны
выгуливать выгулять
сорян славян
вызовов вызвала
пробовал побор
же желе
стать стараюсь
ха за
русскому русском
ок оке
мазок мажора
то ото
не на
обувь обед
завтра заработать
он лон
никогда никого
не неа
был было
баба батута
пойми пофиг
не нее
не нее
кто кто-то
кн нечего
виде видел
не нее
андреевна анжела
прошу проще
дура дулов
ск скак
агапкина 
ниже нижнее
верон вероника
анадырь аналогию
лада ладно
заберем забери
завтра звоню
не нее
тыс там
стоит строит
выбил футболка
не нее
пик питу
прошел пошел
романович романтичная
глав главное
работы работаю
доллар дождусь
не нее
занимаешься 
жень день
приветик привет
мм ммм
выглядят выглядит
крот корот
потому помиримся
что что-то
не нее
привете привет
не нее
ест есть
во вопрос
лежит делись

0.8272

In [132]:
test_u2w, test_u2l = pl_module.cuda().predict_topk(test_dataloader, tokenizer=tokenizer, topk=8)
test_lv = limit_vocab(test_u2w)

0it [00:00, ?it/s]

2023-11-11 15:34:45,419 root /mnt/asr_hot/mitrofanov-aa/projects/chime7/chime7_stc_recipe/egs/it9/ya/maatool/data/feats_itdataset_v2.py:68 - INFO - Processing ark:data_feats/test/feats.ark


In [133]:
baseline_result = pd.read_csv('./keyboard_start/result/baseline.csv', sep=',', names=['main', 'second', 'third', 'trash'])
#baseline_result = pd.read_csv('exp/models/ctc_trans/lightning_logs/version_50422251/test_submit.v1.csv', sep=',', names=['main', 'second', 'third', 'trash'])
#baseline_result = 
baseline_result['uid'] = [f'test-{i}' for i in range(len(baseline_result))]
baseline_result.head()

baseline_result['predict'] = baseline_result.uid.apply(lambda x: test_lv[x])
baseline_result.head()
rows = []
for i, row in baseline_result.iterrows():
    ps = row['predict']
    for p in [row['main'], row['second'], row['third'], row['trash']]:
        if p not in ps:
            ps.append(p)
    rows.append(ps[:4])
        
submission = pd.DataFrame(rows, columns=['main', 'second', 'third', 'trash'])
submission.to_csv("exp/models/t_finetune_test/lightning_logs/version_50454149/test_submit.v11.csv", 
                  sep=',', header=False, index=False)
submission.head()


Unnamed: 0,main,second,third,trash
0,на,нас,нана,нага
1,что,что-то,сто,чтоб
2,опоздания,опоздание,опозданиям,опозданий
3,сколько,скольки,сколько,столько
4,дремать,донимать,дописать,дюрренматт


In [37]:
import pandas as pd

In [102]:
baseline_result = pd.read_csv('exp/models/ctc_trans/lightning_logs/version_50422251/test_submit.v1.csv', sep=',', names=['main', 'second', 'third', 'trash'])
baseline_result['uid'] = [f'test-{i}' for i in range(len(baseline_result))]
baseline_result.head()

Unnamed: 0,main,second,third,trash,uid
0,на,неа,на,ненка,test-0
1,что,часто,частого,чисто,test-1
2,опоздания,опозданиям,оприходования,опозданиями,test-2
3,сколько,сокольского,свердловского,скроено,test-3
4,дремать,дописать,донимать,дюрренматт,test-4


In [39]:
baseline_result['predict'] = baseline_result.uid.apply(lambda u: test_lv[u])
baseline_result.head()

Unnamed: 0,main,second,third,trash,uid,predict
0,на,неа,на,ненка,test-0,"[на, нас, нана, нам, неа, на-на]"
1,что,часто,частого,чисто,test-1,"[что, что-то, сто, чтоб, чисто, со, часто, чмок]"
2,опоздания,опозданиям,оприходования,опозданиями,test-2,"[опоздания, опоздание, опозданиям, опозданий, ..."
3,сколько,сокольского,свердловского,скроено,test-3,"[сколько, скольки, сколько, столько, только, с..."
4,дремать,дописать,донимать,дюрренматт,test-4,"[дремать, донимать, думать]"


In [40]:
rows = []

for i, row in baseline_result.iterrows():
    ps = row['predict']
    for p in [row['main'], row['second'], row['third'], row['trash']]:
        if p not in ps:
            ps.append(p)
    rows.append(ps[:4])
        
submission = pd.DataFrame(rows, columns=['main', 'second', 'third', 'trash'])
submission.head()

Unnamed: 0,main,second,third,trash
0,на,нас,нана,нам
1,что,что-то,сто,чтоб
2,опоздания,опоздание,опозданиям,опозданий
3,сколько,скольки,сколько,столько
4,дремать,донимать,думать,дописать


In [41]:
submission.to_csv("exp/models/transformer_sc/lightning_logs/version_50424998/test_submit.v5.csv", 
                  sep=',', header=False, index=False)