Notebook to extract f0 and loudness from an entire folder of files, then generating an equivalent folder of files but where it's parameters instead.

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import pickle
import tqdm
import threading

from wavegenie.audio_io import load_audio, save_wav
from wavegenie.util import preview_audio, DDSP_DEFAULT_FS_AUDIO
from wavegenie.util import extract_ddsp_synthesis_parameters

In [None]:
FOLDER_PATH = '/juice/scr/rjcaste/curis/lmd_full/synths'
fpath = os.path.join(FOLDER_PATH, '6/689c5310eb33a2efa44e214b83bd3cba-95.wav')

In [None]:
def extract(fpath, basepath, force=True):
    # given an explicit file path and an explicit base folder path (that points to the
    # base folder where fpath resides in), save parameters to equivalent
    # parameters file path
    
    # Parameters:
    # fpath: explicit path to file
    # basepath: explicit path to base folder
    # force: should force extraction if the file has already been extracted in the past?
    # verbose: verbose
    
    base_folder_name = 'synth-params'
    pure_fpath = os.path.splitext(fpath)[0]
    relpath = os.path.relpath(pure_fpath, basepath)  # how to get from basepath to the specific file
    # switch basepath from synths folder to the parameters folder
    basepath = os.path.join(os.path.split(basepath)[0], base_folder_name)
    parameter_path = os.path.join(basepath, relpath) + '.p'

    if os.path.exists(parameter_path) and not(force):
        return
    
    # load audio
    #print('loading audio...')
    audio, fs = load_audio(
        fpath,
        DDSP_DEFAULT_FS_AUDIO,
        num_channels=1,
        normalize=True)
    #print(audio.shape)
    # Extract synthesis parameters
    #print('extracting synthesis parameters...')
    audio_parameters = extract_ddsp_synthesis_parameters(audio)
    
    print('saving...')
    # build up the file tree up to this point, if it doesn't exist yet
    deepest_folder = os.path.split(parameter_path)[0]
    try:
        os.makedirs(deepest_folder)
    except FileExistsError:
        pass
    
    pickle.dump(audio_parameters, open(parameter_path, 'wb'))

In [None]:
# convert every file in the directory one-by-one
allfiles = []
for (dirpath, dirnames, filenames) in os.walk(FOLDER_PATH):
    allfiles += [os.path.join(dirpath, file) for file in filenames]
allfiles = sorted(allfiles)
pickle.dump(allfiles, open('allfiles.p', 'wb'))

In [None]:
def worker(i, skip_every):
    """thread worker function"""
    os.system('python extractor.py {} {}'.format(i, skip_every))
    return

In [None]:
NUM_THREADS = 5
threads = []
for i in range(NUM_THREADS):
    t = threading.Thread(target=worker, args=(i, NUM_THREADS))
    threads.append(t)
    t.start()

In [None]:
allargs = [[f, FOLDER_PATH] for f in allfiles]

In [None]:
import glob
import shutil
import multiprocessing

import numpy as np
from tqdm import tqdm

def _task(x):
    extract(*x)
# started at 3:06:55
with multiprocessing.Pool(8) as p:
    r = list(tqdm(p.imap(_task, allargs), total=len(allfiles)))

In [None]:
NUM_THREADS = 20
threads = []
for i in range(NUM_THREADS):
    t = threading.Thread(target=worker, args=(i, NUM_THREADS, allfiles))
    threads.append(t)
    t.start()