In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
if "../src" not in sys.path:
    sys.path.append("../../src")
    
import json
import pandas as pd
from pathlib import Path
from itertools import chain
from typing import cast, Dict
from tqdm.auto import tqdm
import numpy as np
from praatio import textgrid
from weave import Utterance, Speech, BuildUtt

In [3]:
### Read charac-boundary dictionary
word_syll_map = pd.read_csv(Path("../../data/mandarin_taiwan_mfa_redelvar.syll.tsv"), sep="\t")\
                  .groupby("ipa").first().drop("word", axis=1)\
                  .to_dict(orient="index")
word_syll_map = cast(Dict[str, Dict[str, str]], word_syll_map)

In [4]:
speaker_dir = Path("../../data/shehui/aligned/s0")

for tg_path_x in tqdm(sorted(speaker_dir.glob("*.TextGrid"))):
  mfa_json_path = tg_path_x.with_suffix(".mfa.json")
  fa = textgrid.openTextgrid(str(tg_path_x), False)
  fa_words = fa.getTier("words")
  fa_phones = fa.getTier("phones")
  mfa_wlist = BuildUtt.build_words_phones(fa_words, fa_phones)  #type: ignore
  mfa_words = BuildUtt.build_characters(mfa_wlist, word_syll_map)  #type: ignore
  mfa_utt = Utterance.from_words(mfa_words)
  mfa_utt.utt_id = tg_path_x.stem
  speech_x = Speech.from_utterances([mfa_utt])
  mfa_json_path.write_text(json.dumps(speech_x.to_dict()))

  0%|          | 0/101 [00:00<?, ?it/s]

In [5]:
from weave import MfaAgent
CORPUS_NAME = "shehui"
mfa_dir = Path(f"~/Documents/MFA/{CORPUS_NAME}").expanduser()
agent = MfaAgent(str(mfa_dir), CORPUS_NAME, 
                 speech_json_dir=speaker_dir)

STDERR: gmm-copy --binary=false /home/seantyh/Documents/MFA/shehui/alignment/final.mdl - 
LOG (gmm-copy[5.5.1068]:main():gmmbin/gmm-copy.cc:75) Written model to -



## Buliding data

In [6]:
# MFCC frame duration: 25ms, shift: 10ms
# took around five minutes on T15p
phones_data = []
feats_data = []

for speech_idx, speech_x in enumerate(tqdm(agent.speeches)):
    utt_x = speech_x.utterances[0]
    word_x = utt_x.words[0]
    mfa_uttid = agent.wav2uttid[utt_x.utt_id]
    lgmmprob, feat_mat = agent.compute_gmm(mfa_uttid, return_features=True)

    start_step = int((word_x.start-utt_x.start)/0.01)
    end_step = int((word_x.end-utt_x.start)/0.01)
    end_step = min(end_step, lgmmprob.shape[0])
    
    aligned_phones = list(word_x.iter_phones())
    ali_phone_idx = 0
    cur_phone = aligned_phones[ali_phone_idx]
    for t in range(start_step, end_step):
        if (cur_phone.end-utt_x.start)/0.01 < t+0.5:
            ali_phone_idx += 1
            cur_phone = aligned_phones[ali_phone_idx]
        ml_phone = agent.phone_table[lgmmprob[t, :].argmax()]
        meta_cols = [utt_x.utt_id, t, cur_phone.label, ml_phone]
        phones_data.append([*meta_cols, *lgmmprob[t, :]])
        feats_data.append([*meta_cols, *feat_mat[t, :]])

  0%|          | 0/101 [00:00<?, ?it/s]

transform-feats /home/seantyh/Documents/MFA/shehui/alignment/lda.mat ark:- ark:- 
LOG (transform-feats[5.5.1068]:main():featbin/transform-feats.cc:158) Overall average [pseudo-]logdet is -30.4517 over 34 frames.
LOG (transform-feats[5.5.1068]:main():featbin/transform-feats.cc:161) Applied transform to 1 utterances; 0 had errors.
gmm-compute-likes /home/seantyh/Documents/MFA/shehui/alignment/boost.1.1.mdl ark:- ark:- 
LOG (gmm-compute-likes[5.5.1068]:main():gmmbin/gmm-compute-likes.cc:82) gmm-compute-likes: computed likelihoods for 1 utterances.
transform-feats /home/seantyh/Documents/MFA/shehui/alignment/lda.mat ark:- ark:- 
LOG (transform-feats[5.5.1068]:main():featbin/transform-feats.cc:158) Overall average [pseudo-]logdet is -30.4517 over 30 frames.
LOG (transform-feats[5.5.1068]:main():featbin/transform-feats.cc:161) Applied transform to 1 utterances; 0 had errors.
gmm-compute-likes /home/seantyh/Documents/MFA/shehui/alignment/boost.1.1.mdl ark:- ark:- 
LOG (gmm-compute-likes[5.5.1

In [7]:
import pandas as pd
phones_df = pd.DataFrame(phones_data, columns=["utt_id", "t", "phone", "ml_phone", *agent.phone_table])
feats_df = pd.DataFrame(feats_data, columns=["utt_id", "t", "phone", "ml_phone", *range(feat_mat.shape[1])])

In [8]:
phones_df.shape, feats_df.shape

((3864, 49), (3864, 44))

In [9]:
phones_df_path = "../../data/shehui/shehui_phones_loglik.csv"
feats_df_path = "../../data/shehui/shehui_feats.csv"
phones_df.to_csv(phones_df_path, index=False)
feats_df.to_csv(feats_df_path, index=False)
!sha1sum $phones_df_path $feats_df_path

01c932213afb09e5df84b8f061315ccaf01150b4  ../../data/shehui/shehui_phones_loglik.csv
1dc688287a27da6582e17cc3e04ad8fe1a928ce4  ../../data/shehui/shehui_feats.csv
