In [1]:
# fluidsynth glitches?????

In [1]:
import torch 
from torch.utils.data import Dataset
import pandas as pd
import os
import pickle
import sys

sys.path.append('../')

import numpy as np
from copy import deepcopy
import json
from datetime import datetime
import itertools

from onsets import input_features_extractor

['LICENSE', 'test', 'hvo_sequence', 'README.md', 'setup.py', '.gitignore', '.git', '.vscode', '.idea']


In [5]:
filters = {
    "drummer" : None,
    "session" : None,
    "loop_id" : None, 
    "master_id" : "drummer1/eval_session/10",
    "style_primary" : None,
    "bpm"  : None,
    "beat_type" :None,
    "time_signature" : None,
    "full_midi_filename"  : None,
    "full_audio_filename": None
}

sr = 44100
FRAME_INTERVAL = 0.01
hop_length =  int(round(FRAME_INTERVAL * sr))

input_features_parameters = {
    "sr" : sr,
    "n_fft" : 1024,
    "win_length" : 1024,
    "hop_length" : hop_length,
    "n_bins_per_octave" : 16,
    "n_octaves" : 9,
    "f_min" : 40,
    "mean_filter_size" : 22,
    "n_bars" : 2, 
    "time_signature_numerator" : 4, 
    "time_signature_denominator" : 4, 
    "beat_division_factors" : [4]
}

In [6]:
def check_if_passes_filters(obj,filters):
    for key in filters:
        if filters[key] is not None and obj.to_dict()[key] != filters[key]:
            return False
    return True

class GrooveMidiDataset(Dataset):
    def __init__(self,
                source_path = '../../preprocessed_dataset/datasets_extracted_locally/GrooveMidi/hvo_0.3.0/Processed_On_13_05_2021_at_12_56_hrs',
                subset = 'GrooveMIDI_processed_test',
                metadata_csv_filename='metadata.csv',
                hvo_pickle_filename='hvo_sequence_data.obj',
                filters=filters,
                input_features_parameters=input_features_parameters,
                sf_path = "../soundfonts/filtered_soundfonts/",
                max_len = 32,
                voice_idx = [0,1,5,7],
                n_voices_to_remove = 3,
                max_items = 1000,
                dataset_name = None
                ):
        
        assert (n_voices_to_remove <= len(voice_idx)) , "number of voices to remove can not be greater than length of voice_idx"
        
        # load dataset
        train_file = open(os.path.join(source_path, subset, hvo_pickle_filename),'rb')
        train_set = pickle.load(train_file)
        metadata = pd.read_csv(os.path.join(source_path, subset, metadata_csv_filename))
        
        # init lists to store hvo sequences and processed io 
        self.hvo_sequences=[]
        self.processed_inputs=[]
        self.processed_outputs=[]
        
        # init list with configurations
        self.hvo_index = []
        self.voices_reduced = []
        self.soundfonts = []
        
        # list of soundfonts
        sfs = [os.path.join(sf_path) + sf for sf in os.listdir(sf_path)]  
        
        
        for hvo_idx, hvo_seq in enumerate(train_set):
            
            if len(hvo_seq.time_signatures) == 1: # ignore if time_signature change happens
                
                all_zeros = not np.any(hvo_seq.hvo.flatten())
                
                if not all_zeros: # ignore silent patterns
                    if check_if_passes_filters(metadata.loc[hvo_idx], filters):
                            
                        #add metadata to hvo_seq scores
                        hvo_seq.drummer = metadata.loc[hvo_idx].at["drummer"]
                        hvo_seq.session = metadata.loc[hvo_idx].at["session"]
                        hvo_seq.master_id = metadata.loc[hvo_idx].at["master_id"]
                        hvo_seq.style_primary = metadata.loc[hvo_idx].at["style_primary"]
                        hvo_seq.style_secondary = metadata.loc[hvo_idx].at["style_secondary"]
                        hvo_seq.beat_type = metadata.loc[hvo_idx].at["beat_type"]
                        hvo_seq.loop_id = metadata.loc[hvo_idx].at["loop_id"]
                        hvo_seq.bpm = metadata.loc[hvo_idx].at["bpm"]
                        
                        # pad with zeros to match max_len
                        pad_count = max(max_len - hvo_seq.hvo.shape[0],0)
                        hvo_seq.hvo = np.pad(hvo_seq.hvo, ((0,pad_count), (0,0)), 'constant')
                        hvo_seq.hvo = hvo_seq.hvo[:max_len, :] # in case seq exceeds max len
                        self.hvo_sequences.append(hvo_seq)
                        
                        # voice_combinations
                        voice_idx_comb = list(itertools.combinations(voice_idx,n_voices_to_remove))
                        # combinations of sf and voices 
                        sf_v_comb = list(itertools.product(sfs,voice_idx_comb))
                        
                        if len(sf_v_comb) > max_items/len(train_set):
                            sf_v_comb = random.choices(sf_v_comb, k=max_items)
                            
                        # every sf and voice combination
                        for sf, v_idx in sf_v_comb: 
                            v_idx = list(v_idx)
                                                        
                            ## io processing
                            hvo_seq_in, hvo_seq_out = hvo_seq.reset_voices(voice_idx = voice_idx)
                            
                            self.hvo_index.append(hvo_idx)
                            self.voices_reduced.append(v_idx)
                            self.soundfonts.append(sf)
                            
                            audio = hvo_seq_in.synthesize(sr = input_features_parameters['sr'], sf_path=sf)
                            if len(audio) == 0 : continue # if voices removed give zero hvo
                            input_features = input_features_extractor(audio, qpm=float(hvo_seq.bpm), 
                                                                      **input_features_parameters) 
                            self.processed_inputs.append(input_features)
                        
                            # processed_outputs 
                            self.processed_outputs.append(hvo_seq_out.hvo)
                        

        # store hvo index and soundfonts in csv
        now = datetime.now()
        dt_string = now.strftime("%d_%m_%Y_at_%H_%M_hrs")
        
        # dataset name
        if dataset_name is None:         
            dataset_name = "Dataset_" + dt_string
            
        # save parameters
        parameters_path = os.path.join('../exp_dataset', dataset_name)
        if not os.path.exists(parameters_path): os.makedirs(parameters_path)
        
        parameters = {
                      "dataset_name" : dataset_name,
                      "timestamp" : dt_string,
                      "dataset_info": {
                          "source_path":source_path,
                          "subset":subset,
                          "metadata_csv_filename":metadata_csv_filename,
                          "hvo_pickle_filename": hvo_pickle_filename,
                          "sf_path": sf_path,
                          "filters" : filters,
                          "max_len": max_len,
                          "max_items":max_items
                      },
                      "input_features_parameters" : input_features_parameters,
                      "voice_idx": voice_idx,
                      "n_voices_to_remove": n_voices_to_remove,
                      "dictionaries": {
                          "hvo_index": self.hvo_index,
                          "voices_reduced":self.voices_reduced,
                          "soundfonts" : self.soundfonts
                      }

                     }
        
        parameters_json = os.path.join(parameters_path, 'parameters.json')
        with open(parameters_json, 'w') as f:
            json.dump(parameters, f)
          
                            
    def get_hvo_sequence(self,idx):
        hvo_idx = self.hvo_index(idx)
        return self.hvo_sequences[hvo_idx]
    
    def get_soundfont(self,idx):
        return self.soundfont[idx]
    
    def get_voices_idx(self,idx):
        return self.voices_reduced[idx]
    
    
    def __len__(self):
        return len(self.processed_inputs)
    
    def __getitem__(self,idx):
        return self.processed_inputs[idx], self.processed_outputs[idx], idx
    

In [7]:
%%time
gmd = GrooveMidiDataset()


FileNotFoundError: [Errno 2] No such file or directory: '../test_soundfonts/good_soundfonts/'

In [1]:
os.getcwd()
print(sys.path)
os.listdir('../../')

NameError: name 'os' is not defined

In [144]:
gmd.__len__()

(15, 15000, 15000)

In [116]:
voice_idx = np.arange(0,10)
n_voices_to_remove = 2
voice_idx_comb = list(itertools.combinations(voice_idx,n_voices_to_remove))

sf_v_comb = list(itertools.product(sfs,voice_idx_comb))
print(len(sf_v_comb))
import random
max_items = 100
len(random.choices(sf_v_comb, k=max_items))

900


100

In [90]:
list(itertools.product(sfs,voice_idx))

[('../test_soundfonts/good_soundfonts/GoldDrums.sf2', (0, 1)),
 ('../test_soundfonts/good_soundfonts/GoldDrums.sf2', (0, 2)),
 ('../test_soundfonts/good_soundfonts/GoldDrums.sf2', (1, 2)),
 ('../test_soundfonts/good_soundfonts/80sAcDanceDrums.sf2', (0, 1)),
 ('../test_soundfonts/good_soundfonts/80sAcDanceDrums.sf2', (0, 2)),
 ('../test_soundfonts/good_soundfonts/80sAcDanceDrums.sf2', (1, 2)),
 ('../test_soundfonts/good_soundfonts/HipHop.sf2', (0, 1)),
 ('../test_soundfonts/good_soundfonts/HipHop.sf2', (0, 2)),
 ('../test_soundfonts/good_soundfonts/HipHop.sf2', (1, 2)),
 ('../test_soundfonts/good_soundfonts/HOUSE2.sf2', (0, 1)),
 ('../test_soundfonts/good_soundfonts/HOUSE2.sf2', (0, 2)),
 ('../test_soundfonts/good_soundfonts/HOUSE2.sf2', (1, 2)),
 ('../test_soundfonts/good_soundfonts/phunked.sf2', (0, 1)),
 ('../test_soundfonts/good_soundfonts/phunked.sf2', (0, 2)),
 ('../test_soundfonts/good_soundfonts/phunked.sf2', (1, 2)),
 ('../test_soundfonts/good_soundfonts/MelottiDrums.sf2', (0, 

In [None]:
# 15000 
# CPU times: user 25min 20s, sys: 1min 46s, total: 27min 7s
# Wall time: 27min 24s