In [1]:
%load_ext watermark

## Data dependencies

```
Directory hash: '878029d4ec735540276c27aac3b21c7221088c34'
```

In [2]:
from hashlib import sha1
from pathlib import Path
asp_dir = "../data/asp"
h = sha1()
for path_x in Path(asp_dir).glob("*.txt"):
    h.update(path_x.read_text().encode())
h.hexdigest()

'878029d4ec735540276c27aac3b21c7221088c34'

## Load data

In [3]:
import pandas as pd
import torchaudio
from speechbrain.lobes.features import MFCC
from speechbrain.dataio.dataio import read_audio

In [4]:
from tqdm.auto import tqdm

In [5]:
wav_dir = Path("../data/TaiwaneseMandarinCorpus/")
for path_x in Path(asp_dir).glob("*.txt"):
    df = pd.read_csv(path_x)
    df_vec = pd.DataFrame(columns=["mfcc_first", "mfcc_mean"], 
                          index=df.index)
    wav_path = wav_dir / f"{path_x.stem}.wav"
    assert wav_path.exists()
    
    wav, sr = torchaudio.load(wav_path)
    ## by-item loop
    for idx, row in tqdm(df.iterrows(), total=df.shape[0]):
        start_frame = int(row.start_t * sr)
        end_frame = int(row.end_t * sr)
        seg = wav[:,start_frame:end_frame]
        
        n_fft = int(.025*sr)
        mfcc_fn = MFCC(context=False, 
               win_length=25,
               hop_length=10,
               n_fft=n_fft,
               sample_rate=sr, n_mfcc=20)
        mfcc_tensor = mfcc_fn(seg)  # mfcc_tensor: (B, n_frame, n_mfcc_coef)
        df_vec.at[idx, "mfcc_first"] = mfcc_tensor[0, 0, :].numpy()
        df_vec.at[idx, "mfcc_mean"] = mfcc_tensor.mean(dim=1).squeeze().numpy()
    break

  0%|          | 0/2512 [00:00<?, ?it/s]

Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/SpectralOps.cpp:867.)
  return _VF.stft(input, n_fft, hop_length, win_length, window,  # type: ignore[attr-defined]


In [6]:
df_merged = df.join(df_vec)

In [7]:
sbjname = path_x.stem
out_path = f"../data/{sbjname}.mfcc.parquet"
df_merged.to_parquet(out_path)
!sha1sum $out_path

9b410e0f0043e71172f599c9e23184e68245302e  ../data/CXH2_GY.mfcc.parquet


## Watermark

In [8]:
%watermark

Last updated: 2023-04-23T15:06:38.320823+02:00

Python implementation: CPython
Python version       : 3.10.10
IPython version      : 8.12.0

Compiler    : Clang 14.0.6 
OS          : Darwin
Release     : 22.1.0
Machine     : arm64
Processor   : arm
CPU cores   : 8
Architecture: 64bit



In [9]:
%watermark --iversion

pandas    : 1.5.2
torchaudio: 2.0.1

