In [1]:
import librosa
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import find_peaks
import statsmodels.api as sm

In [3]:
def partition(lst, size):
    for i in range(0, len(lst), size):
        yield lst[i : i+size]


In [4]:
# def acorr(data):
#     mean = np.mean(data)
#     var = np.var(data)
#     acorr = np.correlate(data, data, "full")[len(data)-1:]
#     # acorr = acorr/var/len(syllable_0)
#     return acorr

In [45]:
def get_first_strong_peak(x, label=None):
    acorr = sm.tsa.acf(x, nlags=2000)
    peaks = find_peaks(acorr)[0]
    # plt.plot(acorr, label=label)
    # plt.legend()
    for peak in peaks:
        if acorr[peak] > 0.5:
            return peak, acorr[peak]
    return(-1, 0)

def get_syllables_naive(data, n_syll):
    return list(partition(data, len(data)//n_syll))[:n_syll]

def get_pitches(data, sampling_frequency, n_syll):
    syllables = get_syllables_naive(data, n_syll)
    strong_peaks = [get_first_strong_peak(syll, f"syllable {i}")
        for i, syll in enumerate(syllables)]
    base_pitches = [sampling_frequency / peak if peak > 0 else None
        for peak, _ in strong_peaks]
    final_pitches = [round(pitch, 1) if pitch is not None and 50 < pitch < 500
        else None for pitch in base_pitches]
    return final_pitches

In [48]:
from pathlib import Path
audio_root = Path("./audio")
clips = audio_root.glob("Pages/Masu/**/*.wav")
for file in clips:
    data, sampling_frequency = librosa.load(file)
    n_syllables = len(file.name.split('_')[2].translate({
        ord("ょ"): None,
        ord("ゅ"): None,
        ord("ゃ"): None,
        12441: None, # combining dakuten
        12442: None  # combining handakuten
    }))
    pitches = get_pitches(data, sampling_frequency, n_syllables)
    print(file.name.split('_')[1], pitches)

聞きます [210.0, 294.0, 294.0, None]
買います [229.7, 286.4, 302.1, None]
寝ます [212.0, 268.9, None]
読みます [198.6, 265.7, 306.2, None]
帰ります [212.0, 262.5, 286.4, None, 144.1]
行きます [227.3, 298.0, 298.0, None]
見ます [214.1, 262.5, 144.1]
来ます [237.1, 286.4, None]
します [222.7, 272.2, 138.7]
勉強します [195.1, 256.4, 272.2, None, 256.4, None, 147.0]
起きます [188.5, 279.1, 282.7, None]
食べます [206.1, 265.7, 272.2, None]
飲みます [204.2, 265.7, 302.1, None]
勉強しません [150.0, None, 247.8, 234.6, 229.7, 220.5, 220.5, None]
来ません [176.4, 232.1, 232.1, None]
飲みません [159.8, 196.9, 234.6, 232.1, None]
食べません [163.3, 227.3, 237.1, 222.7, 191.7]
買いません [None, None, 232.1, 225.0, None]
しません [None, 210.0, 229.7, None]
行きません [162.1, 232.1, 237.1, 232.1, None]
読みません [158.6, 210.0, 234.6, 222.7, None]
寝ません [163.3, 216.2, 234.6, 202.3]
起きません [159.8, 242.3, 239.7, 232.1, None]
帰りません [None, None, 234.6, 232.1, 227.3, None]
見ません [165.8, 204.2, 237.1, 202.3]
聞きません [None, 232.1, 232.1, 225.0, None]
運動しました [191.7, 253.4, 282.7, 282.7, 268.9, 26