In [1]:
import os, sys
from pathlib import Path
sys.path.append(str(Path(os.getcwd()).parent))

import os, librosa, shutil, random, itertools
from scipy.io import wavfile
from pathlib import Path, PurePath
import soundfile as sf
import librosa
import numpy as np
from joblib import Parallel, delayed
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_dir = Path("/home/kpintaric/LUMEN-Data-Science-IRMAS/data/raw/IRMAS_Training_Data")
new_dir = Path("/home/kpintaric/LUMEN-Data-Science-IRMAS/data/processed/IRMAS_Training_Data")

In [None]:
class IRMASToPolyphonicPreprocessor:
    def __init__(self, base_dir, new_dir, 
                 sync_pitch: bool=True, sync_bpm: bool=True, sync_onset: bool=True):
        
        self.base_dir = base_dir
        self.new_dir = new_dir
        self.sync_bpm = sync_bpm
        self.sync_pitch = sync_pitch
        self._sync_onset = sync_onset
        self.instruments = [inst for inst in os.listdir(base_dir) if os.path.isdir(inst)]
    
    def preprocess_and_combine_all(self, n_jobs: int=-1):

        Parallel(n_jobs=n_jobs) \
        (delayed(self._preprocess_folder) \
        (instr1, instr2) for instr1, instr2 in itertools.combinations(self.instruments, 2))
        print("Parallel preprocessing done!")
            
    def _preprocess_folder(instr1, instr2):
    
        new_dir_name = '-'.join([instr1, instr2])
        new_dir_path = os.path.join(new_dir, new_dir_name)
        os.makedirs(new_dir_path, exist_ok=True)
        
        instr1_files = list(base_dir.joinpath(instr1).glob("**/*.wav"))
        instr2_files = list(base_dir.joinpath(instr2).glob("**/*.wav"))

        for file1 in instr1_files:
            for file2 in instr2_files:
                synced_file = sync_pitch_bpm(str(file1), str(file2), sr=22050)
                new_name = f'{file1.stem}-{file2.stem}.wav'
                sf.write(os.path.join(new_dir_path, new_name), synced_file, samplerate=22050)

    def _sync_pitch(self, base_file, file_to_sync, sr):

        f_base = librosa.pyin(base_file, fmin=65, fmax=2093, sr=sr)[0]
        f = librosa.pyin(file_to_sync, fmin=65, fmax=2093, sr=sr)[0]

        if not np.isnan(f).all():
            f_base_mean = np.nanmean(np.log(f_base))
            f_mean = np.nanmean(np.log(f))
            steps = np.round(12 * np.log2( np.exp(f_base_mean)/(np.exp(f_mean) )), 0)
        else:
            steps=0

        return pyrb.pitch_shift(y=base_file, sr=sr, n_steps=steps)

    def _sync_bpm(self, base_file, file_to_sync, sr):

        bpm_base = librosa.beat.beat_track(y=base_file, sr=sr)
        bpm = librosa.beat.beat_track(y=file_to_sync, sr=sr)

        return pyrb.time_stretch(y=file_to_sync, sr=sr, rate=bpm_base/bpm)

    def _sync_onset(self, base_file, file_to_sync, sr):

        f_base_start = librosa.onset.onset_detect(y=base_file, units="samples")[0]
        synced_start = librosa.onset.onset_detect(y=file_to_sync, units="samples")[0]
        
        if f_base_start > synced_start:
            diff = f_base_start - synced_start
            return np.pad(file_to_sync, (diff,), mode="constant", constant_values=0)
        
        else: 
            diff = synced_start - f_base_start
            return file_to_sync[diff:]

    def _sync_and_combine(self, base_file, file_to_sync, sr):

        base_file, _ = librosa.load(base_file, mono=True, sr=sr)
        file_to_convert, _ = librosa.load(file_to_sync, mono=True, sr=sr)
        
        bpm_synced = self._sync_bpm(base_file, file_to_convert, sr=sr)
        pitch_and_bpm_synced = self._sync_pitch(base_file, bpm_synced, sr)
        fully_synced = self._sync_onset(base_file, pitch_and_bpm_synced)
    
        base_file = librosa.util.normalize(base_file)
        fully_synced = librosa.util.normalize(fully_synced)

        return (base_file + np.resize(fully_synced, base_file.shape)) / 2

In [23]:
def sum(a, b):
    return a+b

In [8]:
list1 = [1,2,3,4,5,6]
list2 = [1,2,3,4,5,6]

In [20]:
iter = list(itertools.combinations(list1, 2))

In [25]:
Parallel(n_jobs=-1)(delayed(sum)(a,b) for a,b in itertools.combinations(list1, 2))
print("Parallel preprocessing done!")

Parallel preprocessing done!


In [26]:
from utils import CLASSES

In [29]:
combs = itertools.combinations(CLASSES, 2)

In [30]:
combs

<itertools.combinations at 0x7efe9c730bd0>

In [None]:
def _preprocess_folder(instr1, instr2):
    
    new_dir_name = '-'.join([instr1, instr2])
    new_dir_path = os.path.join(new_dir, new_dir_name)
    os.makedirs(new_dir_path, exist_ok=True)
    
    instr1_files = list(base_dir.joinpath(instr1).glob("**/*.wav"))
    instr2_files = list(base_dir.joinpath(instr2).glob("**/*.wav"))

    for file1 in instr1_files:
        for file2 in instr2_files:
            synced_file = sync_pitch_bpm(str(file1), str(file2), sr=22050)
            new_name = f'{file1.stem}-{file2.stem}.wav'
            sf.write(os.path.join(new_dir_path, new_name), synced_file, samplerate=22050)


In [5]:
from functools import partial

In [24]:
def _pitch_extractor(sr):
    fmin = librosa.note_to_hz("C2")
    fmax = librosa.note_to_hz('C7')
    return partial(librosa.pyin, sr=sr, fmin=fmin, fmax=fmax)

In [27]:
def _get_metadata_for_sound(path, sr):

    extract = _pitch_extractor(sr)
    signal, _ = librosa.load(path, sr=sr, mono=True)

    return extract(signal)

In [28]:
_get_metadata_for_sound("/home/kpintaric/LUMEN-Data-Science-IRMAS/data/raw/IRMAS_Training_Data/cel/[cel][cla]0001__1.wav", 16000)

(array([          nan,           nan,           nan,           nan,
                  nan,           nan,           nan, 1514.57052955,
        1514.57052955, 1514.57052955, 1514.57052955, 1523.34434737,
        1523.34434737, 1514.57052955, 1514.57052955, 1505.8472452 ,
        1505.8472452 , 1514.57052955, 1514.57052955, 1514.57052955,
        1514.57052955, 1505.8472452 , 1505.8472452 , 1505.8472452 ,
        1514.57052955, 1514.57052955, 1514.57052955, 1514.57052955,
        1532.1689914 , 1532.1689914 , 1514.57052955, 1514.57052955,
        1514.57052955, 1514.57052955, 1523.34434737, 1523.34434737,
        1523.34434737, 1514.57052955, 1514.57052955, 1514.57052955,
        1523.34434737, 1514.57052955, 1514.57052955, 1514.57052955,
        1514.57052955, 1514.57052955, 1514.57052955, 1523.34434737,
        1514.57052955, 1479.97769085, 1454.55255995, 1437.84559885,
        1437.84559885, 1437.84559885, 1446.17495367, 1462.97869721,
        1429.56421759, 1372.91479778, 1365.00739

In [45]:
arr = np.array(["prvi", "test", "karlo", "tatjana", "ivana"])
np.pad(arr, (0,10), mode="symmetric")

array(['prvi', 'test', 'karlo', 'tatjana', 'ivana', 'ivana', 'tatjana',
       'karlo', 'test', 'prvi', 'prvi', 'test', 'karlo', 'tatjana',
       'ivana'], dtype='<U7')

In [47]:
instr1_files = ["data/me", "test/data"]
instr2_files = ["my/data", "data/my"]

[Path(x) for file_paths in [instr1_files, instr2_files] for x in file_paths]

[PosixPath('data/me'),
 PosixPath('test/data'),
 PosixPath('my/data'),
 PosixPath('data/my')]

In [3]:
class DummyClass:

    def __init__(self, base_dir, new_dir, sr, 
                 sync_pitch: bool=True, sync_bpm: bool=True, sync_onset: bool=True, 
                 random: bool=True, order_by: str=None, metadata: str=None):
        
        self.base_dir = Path(base_dir)
        self.new_dir = Path(new_dir)
        self.sr = sr
        self.sync_bpm = sync_bpm
        self.sync_pitch = sync_pitch
        self.sync_onset = sync_onset
        self.random = random

    def generate_metadata(self, save_path: str=".", n_jobs: int=-1):

        sound_files = list(self.base_dir.glob("**/*.wav"))
        #return sound_files

        output = Parallel(n_jobs=n_jobs) \
                    (delayed(self._get_metadata_for_file) \
                    (str(path)) for path in tqdm(sound_files[:4]))
        
        return output

    def _get_metadata_for_file(self, path: str):
        
        split_path = path.split(os.path.sep)
        fname = split_path[-1]
        instr = split_path[-2]

        signal = self._load_file(str(path))
        pitch = self._get_pitch(signal)
        bpm = self._get_bpm(signal)
        onset = self._get_onset(signal)

        return fname, instr, pitch, bpm, onset

    def _load_file(self, path):
        signal, _ = librosa.load(path, sr=self.sr, mono=True)
        return signal

    def _get_onset(self, signal):
        onset = librosa.onset.onset_detect(y=signal, sr=self.sr, units="samples")[0]
        return onset

    def _get_bpm(self, signal):
        bpm, _ = librosa.beat.beat_track(y=signal, sr=self.sr)
        return bpm

    def _get_pitch(self, signal):
        fmin = librosa.note_to_hz("C2")
        fmax = librosa.note_to_hz('C7')

        pitch, _, _ = librosa.pyin(y=signal, sr=self.sr, fmin=fmin, fmax=fmax)
        
        if not np.isnan(pitch).all():
            mean_log_pitch = np.nanmean(np.log(pitch))
        else:
            mean_log_pitch = None
     
        return mean_log_pitch

In [4]:
base_dir = os.getcwd()


debug = DummyClass(data_dir, new_dir, sr=16000, random=False, order_by="bpm")

In [5]:
output = debug.generate_metadata()

100%|██████████| 4/4 [00:00<00:00, 27.82it/s]


In [6]:
import pandas as pd

In [7]:
df = pd.DataFrame(data = output, columns=["fname", "instr", "pitch", "bpm", "onset"])

In [8]:
df

Unnamed: 0,fname,instr,pitch,bpm,onset
0,122__[sax][nod][cla]1670__3.wav,sax,5.14692,78.125,1536
1,024__[sax][nod][cla]1689__1.wav,sax,6.104435,117.1875,1536
2,[sax][pop_roc]1596__2.wav,sax,4.566183,104.166667,1536
3,[sax][cla]1724__1.wav,sax,5.028955,58.59375,2048


In [14]:
fname, instr, pitch, bpm, onset = df.loc[df.fname=="Test"]

In [3]:
test = Path("data/test")

In [10]:
sample_path = "../data/raw/IRMAS_Training_Data/cel/008__[cel][nod][cla]0058__1.wav"
sample, sr = librosa.load(sample_path)
pitch_org = librosa.pyin(y=sample, sr=sr, fmin=50, fmax=2000)
pitch_org = np.nanmean(np.log1p(pitch_org))

stretched = librosa.effects.time_stretch(y=sample, rate=0.05)
stretched_pitch = librosa.pyin(stretched, sr=sr, fmin=50, fmax=2000)
stretched_pitch = np.nanmean(np.log1p(stretched_pitch))


print(f'Org pitch: {pitch_org}\nStretched pitch: {stretched_pitch}')

Org pitch: 1.7042399319347377
Stretched pitch: 1.6775099844552481


In [23]:
import IPython.display as ipd

ipd.Audio(sample, rate=sr)

NameError: name 'sample' is not defined

In [1]:
import pandas as pd

In [15]:
df = pd.read_csv("../metadata.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,fname,instr,pitch,bpm,onset
0,0,122__[sax][nod][cla]1670__3.wav,sax,5.155168,78.125,1536
1,1,024__[sax][nod][cla]1689__1.wav,sax,6.106671,117.1875,1536
2,2,[sax][pop_roc]1596__2.wav,sax,4.576782,104.166667,1536
3,3,[sax][cla]1724__1.wav,sax,5.035629,58.59375,2048
4,4,159__[sax][dru][cla]1669__2.wav,sax,6.282532,144.230769,1536


In [21]:
cols = ["bpm", "pitch"]

In [66]:
metad = df.loc[df.fname=="122__[sax][nod][cla]1670__3.wav", cols]

In [106]:
a, b = extract_from_df(metad, cols)

In [2]:
from preprocess import IRMASPreprocessor

In [44]:
data_dir = f'../data/raw/IRMAS_Training_Data'
new_dir = f'../data/processed/IRMAS_Training_Data'

In [45]:
preprocessor = IRMASPreprocessor(data_dir, new_dir, sr=16000, metadata="../metadata.csv")

In [49]:
preprocessor.metadata.sort_values(by="bpm")

Unnamed: 0,fname,instr,pitch,bpm,onset
2217,[gac][cla]0619__3.wav,gac,5.115040,48.076923,9728
1918,[gac][pop_roc]0654__1.wav,gac,4.264949,53.571429,1536
5635,[cel][cla]0105__1.wav,cel,4.895393,55.147059,4096
5989,[pia][cla]1341__3.wav,pia,4.476723,56.818182,1536
2620,[org][jaz_blu]1265__2.wav,org,4.532306,60.483871,7680
...,...,...,...,...,...
5150,[gel][pop_roc]0987__1.wav,gel,4.753090,208.333333,1536
6101,[pia][cla]1294__3.wav,pia,4.542643,208.333333,7680
1815,[gac][jaz_blu]0535__3.wav,gac,4.812477,234.375000,1536
5580,[gel][pop_roc]1023__3.wav,gel,4.508291,234.375000,2560


In [50]:
signal = preprocessor._load_file("/home/kpintaric/LUMEN-Data-Science-IRMAS/data/raw/IRMAS_Training_Data/gac/[gac][pop_roc]0567__2.wav")

In [51]:
librosa.beat.beat_track(y=signal, sr=16000)

(234.375, array([ 5, 13, 21, 29, 37, 45, 54, 62, 70]))

In [52]:
ipd.Audio(signal, rate=16000)

In [66]:
shifted = librosa.effects.pitch_shift(y=signal, sr=16000, n_steps=12)
ipd.Audio(shifted, rate=16000)

In [53]:
librosa.feature.rhythm.tempo(y=signal, sr=16000)[0]

234.375

In [60]:
import pyrubberband as pyrb

In [64]:
shifted = pyrb.pitch_shift(y=signal, sr=16000, n_steps=5)
ipd.Audio(shifted, rate=16000)

In [43]:
preprocessor.metadata.sort_values(by="pitch")[:100]

Unnamed: 0,fname,instr,pitch,bpm,onset
5587,108__[gel][dru][pop_roc]0955__3.wav,gel,-9999.000000,144.230769,1536
4171,200__[voi][dru][lat_sou]2456__1.wav,voi,-9999.000000,144.230769,1536
2596,[org][pop_roc]1050__3.wav,org,-9999.000000,98.684211,1536
5354,[gel][pop_roc]0903__2.wav,gel,-9999.000000,98.684211,1536
5017,[gel][jaz_blu]0927__3.wav,gel,-9999.000000,104.166667,1536
...,...,...,...,...,...
6502,[pia][jaz_blu]1524__1.wav,pia,4.204558,117.187500,1536
4577,172__[voi][dru][pop_roc]2470__1.wav,voi,4.205195,144.230769,1536
3055,[org][pop_roc]1095__3.wav,org,4.205364,133.928571,2048
1751,[vio][jaz_blu]2229__2.wav,vio,4.205752,104.166667,1536
