In [3]:
import numpy as np
import librosa
from time import time

In [4]:

dir_path = "../../vocalizationcorpus/data/"
filenames = sorted([os.path.join(dir_path, p) for p in os.listdir(dir_path)])


In [21]:
k=7
y, sr = librosa.load(filenames[k])

n_fft = 2048
n_mels = 13
mag_fft = np.absolute(librosa.core.stft(y, n_fft=n_fft))
pow_fft = ((1.0 / n_fft) * (mag_fft ** 2))
fbank = librosa.filters.mel(sr=sr, n_fft=2048, n_mels=n_mels,fmin=0, fmax=sr / 2)
fbanked_features = mag_fft.T.dot(fbank.T)

S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
log_S = librosa.power_to_db(S, ref=np.max)
mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=n_mels)
delta_mfcc  = librosa.feature.delta(mfcc)
delta2_mfcc = librosa.feature.delta(mfcc, order=2)

M = np.hstack((mfcc.T, delta_mfcc.T, delta2_mfcc.T, fbanked_features))

M.shape

(474, 52)

In [78]:
IPython.display.Audio(data=y, rate=sr)

In [19]:
names = []
intervals = []
with open("../../vocalizationcorpus/labels.txt") as inp:
    inp.readline()
    for st in inp:
        arr = st.strip().split(',')
        name = arr[0]
        names.append(name)
        intervals.append([])
        items_left = (len(arr) - 4)
#         print(name, arr)
        assert(items_left % 3 == 0)
        for i in range(items_left // 3):
            interval_type = arr[4 + 3 * i]
            if interval_type == 'filler':
                continue
            intervals[-1].append((float(arr[4 + 3 * i + 1]), float(arr[4 + 3 * i + 2])))
        

In [23]:
current_time = time()
start_time = current_time
def log(message):
    global current_time
    new_time = time()
    print(f"{message}, {int(new_time - current_time)}s, total {int(new_time - start_time)}s")
    current_time = new_time
with open("mcc_features.csv", 'w') as otp:
    colnames = ["mfcc_{}".format(i) for i in range(n_mels)] +\
               ["mfcc_delta_{}".format(i) for i in range(n_mels)]  +\
               ["mfcc_delta2_{}".format(i) for i in range(n_mels)] +\
               ["fbank_{}".format(i) for i in range(n_mels)] +\
               ["IS_LAUGHTER", "SNAME"]
    otp.write(','.join(colnames) + "\n")
    for filenum, path in enumerate(filenames):
        if filenum % 100 == 0:
            log(f"{filenum}/{len(filenames)}")
        y, sr = librosa.load(path)
        secs_len = y.shape[0] / sr
        S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
        log_S = librosa.power_to_db(S, ref=np.max)
        mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)
        delta_mfcc  = librosa.feature.delta(mfcc)
        delta2_mfcc = librosa.feature.delta(mfcc, order=2)
        
        n_fft = 2048
        mag_fft = np.absolute(librosa.core.stft(y, n_fft=n_fft))
        pow_fft = ((1.0 / n_fft) * (mag_fft ** 2))
        fbank = librosa.filters.mel(sr=sr, n_fft=2048, n_mels=13,fmin=0, fmax=sr / 2)
        fbanked_features = mag_fft.T.dot(fbank.T)
        
        M = np.hstack((mfcc.T, delta_mfcc.T, delta2_mfcc.T, fbanked_features))
        
        filename = filenames[filenum].split('/')[-1].strip('.wav')
#         print(filename, names[filenum])
        assert(filename == names[filenum])
        for i in range(M.shape[0]):
            line = ','.join(map(str, M[i]))
            mcc_time = i / M.shape[0] * secs_len
            is_laughter = 0
            for start, end in intervals[filenum]:
                if start < mcc_time and mcc_time < end:
                    is_laughter = 1
                    break
            line = line + f",{is_laughter},{filename}\n"
            otp.write(line)
log("done")

0/2763, 0s, total 0s
100/2763, 74s, total 74s
200/2763, 75s, total 150s
300/2763, 66s, total 216s
400/2763, 67s, total 284s
500/2763, 77s, total 361s
600/2763, 68s, total 430s
700/2763, 76s, total 506s
800/2763, 68s, total 574s
900/2763, 79s, total 654s
1000/2763, 65s, total 720s
1100/2763, 70s, total 790s
1200/2763, 71s, total 862s
1300/2763, 78s, total 940s
1400/2763, 79s, total 1020s
1500/2763, 75s, total 1096s
1600/2763, 71s, total 1167s
1700/2763, 78s, total 1246s
1800/2763, 70s, total 1316s
1900/2763, 70s, total 1386s
2000/2763, 64s, total 1450s
2100/2763, 71s, total 1522s
2200/2763, 78s, total 1601s
2300/2763, 77s, total 1678s
2400/2763, 65s, total 1744s
2500/2763, 68s, total 1812s
2600/2763, 72s, total 1884s
2700/2763, 69s, total 1954s
done, 44s, total 1998s
