In [241]:
from scipy import signal
import numpy as np
from scipy.io import wavfile

YIN PAPER 
http://audition.ens.fr/adc/pdf/2002_JASA_YIN.pdf

Explanation: https://www.youtube.com/watch?v=W585xR3bjLM

In [242]:
def f(x):  # Generates Signal
    f_0 = 1
    return np.sin(x*np.pi*2*f_0)

In [243]:
def ACF(f, W, t, lag):
    # Calculates autocorr.
    # f: signal
    # W: window size
    # t: timestep
    # lag: number of samples to shift
    return np.sum(
        f[t: t+W] * f[lag+t:lag+t+W]
    )

In [244]:
def detect_pitch(f, W, t, sample_rate, bounds):
    # Estimates signal pitch
    # f: signal
    # W: window size
    # t: timestep
    # sample_rate: signal sample rate
    # bounds: range of samples quantity to lag signal.
    ACF_vals = [ACF(f, W, t, i) for i in range(*bounds)]
    sample = np.argmax(ACF_vals)+bounds[0]
    return sample_rate/sample #convert to frequency


In [245]:
# Test
sample_rate = 500 #signal sample rate
start = 0 #signal t0
end = 5 #signal tf
num_samples = int(sample_rate * (end - start) + 1)
window_size = 200 # autcorr window size
bounds = [20, num_samples //2] #from 20 samples lag to half of the signal.
x = np.linspace(start, end, num_samples)
print(detect_pitch(f(x), window_size, 1, sample_rate, bounds))

0.9823182711198428


#### Test with decreasing amplitude signal.

In [246]:
def dicreasing_f(x):  # Generates Signal
    f_0 = 1
    envelope = lambda x: np.exp(-x)
    return np.sin(x*np.pi*2*f_0) * envelope(x)

In [247]:
# Test 2
sample_rate = 500 #signal sample rate
start = 0 #signal t0
end = 5 #signal tf
num_samples = int(sample_rate * (end - start) + 1)
window_size = 200 # autcorr window size
bounds = [20, num_samples //2] #from 20 samples lag to half of the signal.
x = np.linspace(start, end, num_samples)
print(detect_pitch(dicreasing_f(x), window_size, 1, sample_rate, bounds))

25.0


#### Take into account the square when calculating autocorr. If not, negative terms cancel out.

In [248]:
def DF(f, W, t, lag):
    return ACF(f, W, t, 0)+ACF(f, W, t+lag, 0) - (2*ACF(f, W, t, lag))


def detect_pitch_square(f, W, t, sample_rate, bounds):
    DF_vals = [DF(f, W, t, i) for i in range(*bounds)]
    sample = np.argmin(DF_vals)+bounds[0]
    return sample_rate/sample


In [249]:
# Test 2
print(detect_pitch_square(dicreasing_f(x), window_size, 1, sample_rate, bounds))

25.0


#### Cumulative mean normalize difference function

In [250]:
def CMNDF(f, W, t, lag):
    if lag == 0:
        return 1
    return DF(f, W, t, lag) / np.sum([DF(f, W, t, j+1) for j in range(lag)])*lag


def detect_pitch_cmndf(f, W, t, sample_rate, bounds, thresh=0.1):
    CMNDF_vals = [CMNDF(f, W, t, i) for i in range(*bounds)]
    sample = None
    for i, val in enumerate(CMNDF_vals):
        if val < thresh:
            sample = i+bounds[0]
            break
    if sample is None:
        sample = np.argmin(CMNDF_vals)+bounds[0]
    return sample_rate/sample


In [251]:
print(detect_pitch_cmndf(dicreasing_f(x), window_size, 1, sample_rate, bounds))

1.002004008016032


## with files

In [252]:
sample_rate, data = wavfile.read("assets/tenor.wav")
data = data.astype(np.float64)
window_size = int(5/2000*sample_rate)
bounds = [20, 2000]

pitches = []
for i in range(data.shape[0] // (window_size+3)):
    pitches.append(
        detect_pitch(
            data,
            window_size,
            i*window_size,
            sample_rate,
            bounds
        ))

print(pitches)

  sample_rate, data = wavfile.read("assets/tenor.wav")


[35.450160771704184, 50.63145809414466, 75.77319587628865, 48.675496688741724, 32.37885462555066, 56.39386189258312, 147.0, 88.91129032258064, 30.925666199158485, 31.91027496382055, 34.21256788207913, 35.1394422310757, 60.66024759284732, 62.64204545454545, 91.11570247933884, 27.805800756620428, 27.5625, 125.2840909090909, 35.22364217252396, 81.21546961325967, 38.481675392670155, 76.69565217391305, 32.142857142857146, 62.20028208744711, 35.05564387917329, 60.0, 44.77157360406091, 193.42105263157896, 193.42105263157896, 290.13157894736844, 22.5, 140.0, 153.125, 24.23076923076923, 400.90909090909093, 81.06617647058823, 22.87344398340249, 24.40509131156613, 23.532550693703307, 112.5, 61.85133239831697, 30.413793103448278, 23.40764331210191, 95.66160520607376, 27.579737335834896, 27.9290690310323, 24.391592920353983, 32.59423503325942, 52.81437125748503, 38.820422535211264, 120.82191780821918, 46.12970711297071, 518.8235294117648, 66.01796407185628, 300.0, 50.11363636363637, 36.872909698996