In [79]:
import os
from random import shuffle
import mido
from shutil import copy

In [77]:
data_dir = '../data_raw/8dio_samples'
data_name = '8dio_samples'

train_split = 0.8
val_split = 0.1
test_split = 0.1

In [50]:
def convert_file(file):
    name = file.split('.')[0]
    ext = file.split('.')[1]

    if ext == 'wav':
        return name + '.mid'
    else:
        return name + '.wav'

def match_files(filter_list):
    new_list = []

    for file in filter_list:
        new_list.append(convert_file(file))

    return new_list


In [38]:
wav_files = []
mid_files = []

for file in os.listdir(data_dir):
    file_path = os.path.join(data_dir, file)
    if file.endswith("wav"):
        wav_files.append(file)
    elif file.endswith("mid"):
        mid_files.append(file)

wav_files.sort()
mid_files.sort()


mid = mido.MidiFile(os.path.join(data_dir, mid_files[3]), clip=True)
mid.tracks[1]


MidiTrack([
  MetaMessage('track_name', name='yee_FX#11', time=0),
  Message('note_on', channel=0, note=66, velocity=122, time=411),
  Message('note_on', channel=0, note=54, velocity=125, time=0),
  Message('note_off', channel=0, note=66, velocity=0, time=1283),
  Message('note_off', channel=0, note=54, velocity=0, time=89),
  MetaMessage('end_of_track', time=0)])

In [41]:
def midi_filter_length(files, min_length=10):
    filtered = []
    for file in files:
        mid = mido.MidiFile(os.path.join(data_dir, file), clip=True)
        if len(mid.tracks[1]) > min_length:
            filtered.append(file)

    return filtered

1416

In [42]:
long_midi = midi_filter_length(mid_files, 10)

In [73]:
shuffle(long_midi)

train_split_index = int(len(long_midi) * train_split)
val_split_index = int(len(long_midi) * (train_split + val_split))

train_data = long_midi[0:train_split_index]
val_data = long_midi[train_split_index:val_split_index]
test_data = long_midi[val_split_index:]


In [87]:
save_dir = os.path.join('../data', data_name)

try:
    os.mkdir(save_dir)
except:
    pass

data_dict = {'train': train_data, 'val': val_data, 'test': test_data}

for data_type in data_dict:
    data_list = data_dict[data_type]

    for file_type in ['midi', 'wav']:
        folder = os.path.join(save_dir, file_type, data_type)

        try:
            os.mkdir(os.path.join(save_dir, file_type))
        except:
            pass

        try:
            os.mkdir(folder)
        except:
            pass

        for file in data_dict[data_type]:
            if file_type == 'wav':
                file = convert_file(file)
            copy(os.path.join(data_dir, file), os.path.join(folder, file))
