In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle as pkl
import keras
from keras.utils import plot_model

import pydot as pyd
from keras.utils.vis_utils import plot_model, model_to_dot
keras.utils.vis_utils.pydot = pyd
import tensorflow as tf

import os, glob
import pandas as pd

In [None]:
# AMOUNT = '-small'
# AMOUNT = '-medium'
AMOUNT = ''

root_path = '/home/tikim/code/midi-velocity-infer'
dataset_test_path = f'{root_path}/dataset/maestro-midi{AMOUNT}/test'

# read pkl file
dataset = pkl.load(open('dataset.pkl', 'rb'))
train_time_diff_min = dataset['train_time_diff_min']
train_time_diff_max = dataset['train_time_diff_max']
note_num_min = dataset['note_num_min']
note_num_max = dataset['note_num_max']
length_min = dataset['length_min']
length_max = dataset['length_max']
velocity_min = dataset['velocity_min']
velocity_max = dataset['velocity_max']
dataset = None

current_dir = os.getcwd()

extension = 'csv'
os.chdir(dataset_test_path)
test_csv_filenames = glob.glob('*.{}'.format(extension))
csv_files_test = []
for filename in test_csv_filenames:
    df = pd.read_csv(filename, index_col=None, header=0)
    csv_files_test.append(df)

os.chdir(current_dir)


In [None]:
columns_train = ['time_diff', 'note_num', 'length']
columns_label = ['velocity']

def divide_list(l, n, overlapping_window=0):
    for i in range(0, len(l) - n + 1, n - overlapping_window):
        yield l[i:i + n]
    if len(l) % n != 0 and len(l) % n < n:
        yield l[-(len(l) % n):]

SAMPLE_LENGTH = 4
def pad_data(data, feature_num):
    if (len(data[-1]) != SAMPLE_LENGTH):
        # print(f'Length of last array: {len(data[-1])}')
        last_array = data.pop()
        # print(f'before padding: {last_array}')
        zero_array = np.zeros((SAMPLE_LENGTH - len(last_array), feature_num), dtype=np.float32)
        last_array = np.concatenate((last_array, zero_array))
        # print(f'after padding: {last_array}')
        data.append(last_array)
        # print(f'Length of last array (after padding): {len(data[-1])}')
    return data

def make_dataset(csv_data, columns_train, columns_label):
    dataset_entire_input = np.empty((0, SAMPLE_LENGTH, 3), dtype=np.float32)
    dataset_entire_label = np.empty((0, SAMPLE_LENGTH, 1), dtype=np.float32)

    data_input_raw = np.array(csv_data[columns_train], dtype=np.float32)
    data_label_raw = np.array(csv_data[columns_label], dtype=np.float32)

    # normalize only the time difference
    data_input_raw[:, 0] = (data_input_raw[:, 0] - train_time_diff_min) / (train_time_diff_max - train_time_diff_min)
    # normalize only the note number
    data_input_raw[:, 1] = (data_input_raw[:, 1] - note_num_min) / (note_num_max - note_num_min)
    # normalize only the length
    data_input_raw[:, 2] = (data_input_raw[:, 2] - length_min) / (length_max - length_min)
    # normalize only the velocity
    data_label_raw[:, 0] = (data_label_raw[:, 0] - velocity_min) / (velocity_max - velocity_min)

    dataset_input = list(divide_list(data_input_raw, SAMPLE_LENGTH))
    dataset_input = pad_data(dataset_input, 3)
    dataset_input = np.array(dataset_input, dtype=np.float32)
    dataset_entire_input = np.vstack((dataset_entire_input, dataset_input))

    dataset_label = list(divide_list(data_label_raw, SAMPLE_LENGTH))
    dataset_label = pad_data(dataset_label, 1)
    dataset_label = np.array(dataset_label, dtype=np.float32)
    dataset_entire_label = np.vstack((dataset_entire_label, dataset_label))    
    
    return dataset_entire_input, dataset_entire_label

test_csv_file = csv_files_test[0]
dataset_input, dataset_label = make_dataset(test_csv_file, columns_train, columns_label)

In [None]:
from keras.losses import mse, cosine_similarity
def make_mse_cosine_loss(alpha):
    def mse_cosine_loss(y_true, y_pred):
        # y_pred = tf.clip_by_value(y_pred, clip_value_min=0, clip_value_max=127)
        return alpha * (1 * cosine_similarity(y_true, y_pred)) + (1 - alpha) * mse(y_true, y_pred)
    return mse_cosine_loss
ALPHA = 0.15
mse_cosine_loss = make_mse_cosine_loss(ALPHA)

def clipped_loss(y_true, y_pred):
    y_pred = tf.clip_by_value(y_pred, clip_value_min=0, clip_value_max=127)
    loss = tf.losses.mean_squared_error(y_true, y_pred)
    return loss

# model = keras.models.load_model('model.h5', custom_objects={'clipped_loss': clipped_loss})
model = keras.models.load_model('model.h5', custom_objects={'mse_cosine_loss': mse_cosine_loss})

In [None]:
dataset_test_result = model.predict(dataset_input)
print(dataset_test_result.shape, dataset_label.shape)

In [None]:
result = dataset_test_result.reshape(-1) * velocity_max
true = dataset_label.reshape(-1) * velocity_max

result = result.round()
true = true

# result = dataset_test_result.reshape(-1).astype(int)
# np.clip(result, 0, 127, out=result)
# true = dataset_label.reshape(-1).astype(int)

In [None]:
plt.figure(figsize=(20, 3))
plt.plot(result[:500], label='result', linestyle='--', )
plt.plot(true[:500], label='true', linestyle='-')
plt.show()

In [None]:
# get MAE between result_augmented and true
mae = np.mean(np.abs(result - true))
print(mae)

In [None]:
# Get difference between result and true
diff = true - result

# plot distribution of diff
plt.figure(figsize=(10, 3))
plt.hist(diff, bins=100)
plt.show()

In [None]:
import scipy.stats as stats
# Get approximated normal distribution of diff
mean = np.mean(diff)
std = np.std(diff)
print(mean, std)

# Make approximated normal distribution of diff
x = np.linspace(mean - 3 * std, mean + 3 * std, 100)
pdf = stats.norm.pdf(x, mean, std)

# Plot approximated normal distribution of diff
plt.plot(x, pdf, label='Approximate Normal Distribution')
plt.hist(diff, bins=30, density=True, alpha=0.5, label='Original Distribution')
plt.legend()
plt.xlabel('x')
plt.ylabel('Probability Density')
plt.title('Approximation of Normal Distribution')
plt.show()


In [None]:
random_numbers = np.random.normal(mean, std * 0.2, size=len(result))
result_augmented = result + random_numbers
# clip the result_augmented
result_augmented = np.clip(result_augmented, 0, 127)

# plot the result_augmented and true values
plt.figure(figsize=(20, 3))
plt.plot(result_augmented[:500], label='result', linestyle='--', )
plt.plot(true[:500], label='true', linestyle='-')
plt.show()

In [None]:
# get MAE between result_augmented and true
mae = np.mean(np.abs(result_augmented - true))
print(mae)

In [None]:
# Preparing data for csv
def generate_csv(csv_file, filename, columns_input, result):
    data_demo_input = np.array(csv_file[columns_input], dtype=int)
    # Get the length of data_demo_input and result_augmented
    length_data_demo = len(data_demo_input[:, 0])
    length_result_demo = len(result.reshape(-1))
    # print(length_data_demo, length_result_demo)
    if (length_data_demo < length_result_demo):
        result = result[:length_data_demo]

    data_demo_velocity = np.array(np.round(result), dtype=int)

    dataframe = pd.DataFrame({'time': data_demo_input[:, 0], 
                            'time_diff': data_demo_input[:, 1], 
                            'note_num': data_demo_input[:, 2], 
                            'length': data_demo_input[:, 3], 
                            'velocity': data_demo_velocity})
    dataframe.to_csv(filename, index=False)

columns_full_input = ['time', 'time_diff', 'note_num', 'length']
generate_csv(test_csv_file, 'result.csv', columns_full_input, result)
generate_csv(test_csv_file, 'result_augmented.csv', columns_full_input, result_augmented)


In [None]:
from pathlib import Path
from subprocess import Popen, PIPE
import os

## util/csv2midi ??_predicted.csv ??.midi
def csv2midi(csv_filename, midi_original_filename):
    csv2midi_filename = '../util/csv2midi'

    process = Popen([csv2midi_filename, csv_filename, midi_original_filename], stdout=PIPE, universal_newlines=True)
    (output, err) = process.communicate()
    exit_code = process.wait()

    print(output)
    print(f'csv2midi exit code: {exit_code}')

original_midi_filename = f'midi.midi'
result_csv_filename = 'result.csv'
csv2midi(result_csv_filename, original_midi_filename)
os.rename('midi_predicted.midi', 'midi_predicted_vanilla.midi')

result_csv_filename = 'result_augmented.csv'
csv2midi(result_csv_filename, original_midi_filename)
os.rename('midi_predicted.midi', 'midi_predicted_augmented.midi')