In [None]:
#!apt-get update -qq && apt-get install -qq libfluidsynth1 fluid-soundfont-gm build-essential libasound2-dev libjack-dev
#!pip install pypianoroll


In [None]:
import pypianoroll
import os
import numpy as np
import glob
import random
import os
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
device = 'cpu'

In [None]:
def create_parts(mid):
    parts = {'Piano': None, 'Piano Right': None, 'Piano Left': None,
             'Violin1': None, 'Violin2': None, 'Viola': None, 'Cello': None}

    programs = {'Piano': None, 'Piano Right': None, 'Piano Left': None,
                'Violin1': None, 'Violin2': None, 'Viola': None, 'Cello': None}

    to_normalize = {
        'Right': 'Piano Right',
        'Left': 'Piano Left',
        'Grand Piano': 'Piano',
        'Violino I': 'Violin1',
        'Violino II': 'Violin2',
        '1st Violin': 'Violin1',
        '2nd Violin': 'Violin2',
        'Solo Cello': 'Cello',
        'Violoncello': 'Cello',
        'Cello Pizz': 'Cello',
        'Six Suites for Solo Cello': 'Cello',
        'Violin 1': 'Violin1',
        'Violin 2': 'Violin2',
        'Piano RH': 'Piano Right',
        'Piano LH': 'Piano Left',
        'Piano RH (Playback)': 'Piano Right',
        'Piano LH (Playback)': 'Piano Left',
        'Violino I.': 'Violin1',
        'Viola.': 'Viola',
        'Violin 1 arco': 'Violin1',
        'Cello arco': 'Cello',
        'Cello pizz': 'Cello',
        'Violino II.': 'Violin2',
        'Violin2 Pizz': 'Violin2',
        'Violin 2 pizz': 'Violin2',
        'Violin 2 arco': 'Violin2',
        'Cello (Playback)': 'Cello',
        'Viola pizz': 'Viola',
        'Viola Arco': 'Viola',
        'Violincello': 'Cello',
        'Violoncello.': 'Cello',
        'Violin1 Sub': 'Violin1',
        'Viola arco': 'Viola',
        'Viola Pizz': 'Viola',
        'Violin1 Pizz': 'Violin1',
        'Viola Sub': 'Viola',
        'Violin Pizz': 'Violin1',
        'Violin (Playback)': 'Violin1',
        'Viola (Playback)': 'Viola',
        'Acoustic Grand Piano': 'Piano',
        'Pianoforte': 'Piano',
        'Violin': 'Violin1',
    }

    to_ignore = [
        'Instrument17', 'Instrument18', 'Instrument19',
        '--------------------------------------', 'Johann Sebastian Bach  (1685-1750)'
    ]

    for track in mid.tracks:
        if track.name not in to_ignore:
            if track.name in to_normalize.keys():
                normalized_name = to_normalize[track.name]
                if normalized_name in parts.keys() and parts[normalized_name] is None:
                    parts[normalized_name] = track.pianoroll
                    programs[normalized_name] = track.program
            elif track.name in parts.keys() and parts[track.name] is None:
                parts[track.name] = track.pianoroll
                programs[track.name] = track.program
            if track.pianoroll.shape[0] > 0:
                empty_array = np.zeros_like(track.pianoroll)

    for k, v in parts.items():
        if v is None:
            parts[k] = empty_array.copy()

    return parts, programs

def create_lstm_files(new_music):

    no_of_timesteps = 32
    x = []
    y = []

    samples_per_music = 10

    for music in new_music:
        samples = 0
        for track in music:
            random_start = random.randrange(0, len(track) - samples_per_music * no_of_timesteps)
            random_step = random.randrange(5, no_of_timesteps)
            if samples < samples_per_music:
                for i in range(random_start, len(track) - no_of_timesteps, random_step):
                    input_ = track[i:i + no_of_timesteps]
                    output = track[i + no_of_timesteps]
                    if input_.any():
                        x.append(input_)
                        y.append(output)
                        samples += 1

    x=np.array(x)
    y=np.array(y)

    x_tr, x_val, y_tr, y_val = train_test_split(x,y,test_size=0.2,random_state=0)

    return x_tr, x_val, y_tr, y_val

In [None]:

input_path = 'data'

raw_path = os.path.join(input_path, '00_raw')
preprocess_path = os.path.join(input_path, '01_preprocessed')
preparation_path = os.path.join(input_path, '02_preparation')

preprocess_df = pd.read_csv(os.path.join(preprocess_path, 'preprocessed_dataset.csv'))
mid_files_to_use = preprocess_df.mid_name.to_list()

all_composer_path = glob.glob(f"{raw_path}/*")

all_parts = []
all_programs = []

for composer_path in all_composer_path:
    composer_mid_files_path = glob.glob(f"{composer_path}/*.mid")
    for mid_file_path in composer_mid_files_path:
        try:
            if mid_file_path.split('\\')[-1] in mid_files_to_use: #and len(all_parts) < 3
                mid = pypianoroll.read(os.path.join(mid_file_path))
                parts, programs = create_parts(mid)
                numpy_parts = np.array([parts['Piano'], parts['Piano Right'], parts['Piano Left'],
                                        parts['Violin1'], parts['Violin2'], parts['Viola'], parts['Cello']])
                all_parts.append(numpy_parts)
                all_programs.append(programs)
        except Exception as e:
            print(e)
            print('problem', mid_file_path)

x_tr, x_val, y_tr, y_val = create_lstm_files(all_parts)
print(x_tr.shape)
x_tr[x_tr > 0.2] = 1.0
x_val[x_val > 0.2] = 1.0
y_tr[y_tr > 0.2] = 1.0
y_val[y_val > 0.2] = 1.0

file = os.path.join(preparation_path, f'x_tr.pkl')
with open(file, 'wb') as f:
    pickle.dump(x_tr, f)

file = os.path.join(preparation_path, f'x_val.pkl')
with open(file, 'wb') as f:
    pickle.dump(x_val, f)

file = os.path.join(preparation_path, f'y_tr.pkl')
with open(file, 'wb') as f:
    pickle.dump(y_tr, f)

file = os.path.join(preparation_path, f'y_val.pkl')
with open(file, 'wb') as f:
    pickle.dump(y_val, f)
