In [None]:
# BVH, Feb 2022.

import os
import pathlib
import sys
for base_dir in [os.getcwd(), str(pathlib.Path(os.getcwd()).parent)]:
    print(base_dir)
    sys.path.append(base_dir)
    sys.path.append(os.path.join(base_dir, 'experimental/'))
    sys.path.append(os.path.join(base_dir, 'third_party/'))
    sys.path.append(os.path.join(base_dir, 'third_party/pyfluidsynth'))
    sys.path.append(os.path.join(base_dir, 'third_party/pyfluidsynth/test'))

from __init__ import *

# Library imports.
import fluidsynth
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
import scipy
import scipy.io
import scipy.signal
import time

In [None]:
def plot_waveform(x, Fs, zoom=1):
    plt.figure()
    if zoom > 1:
        x = x[int(len(x) / 2 - len(x) / (zoom * 2)):int(len(x) / 2 + len(x) / (zoom * 2))]
    t = np.arange(len(x)) / Fs
    plt.plot(t, x)
    plt.xlabel('Time [s]')
    plt.ylabel('Signal')
    plt.xlim(0, len(x) / Fs)
    plt.ylim(-1, 1)
    plt.grid(True)
    plt.show()

def plot_stft(x, Fs):
    f, t, Zxx = scipy.signal.stft(x, Fs, nperseg=256)
    plt.figure()
    plt.pcolormesh(t, f / 1000.0, np.abs(Zxx), vmin=0, vmax=0.04, shading='gouraud')
    plt.title('STFT Magnitude')
    plt.ylabel('Frequency [kHz]')
    plt.xlabel('Time [s]')
    plt.xlim(0, len(x) / Fs)
    plt.ylim(0, Fs / 6000.0)
#     plt.yscale('log')
    plt.grid(True)
    plt.show()
    
def play(x, Fs):
    ipd.display(ipd.Audio(x, rate=Fs))

In [None]:
def random_tp_matrix(duration_ms, timecell_ms):
    '''
    :param duration_ms (int): Total duration of the generated melody.
    :param timecell_ms (int): Duration of one timecell (i.e. element in the horizontal axis) [ms].
    :return tp_matrix (P, T) numpy array: Timecell / frequency binary description of melody.
    '''
    P = 128
    
    T = duration_ms // timecell_ms
    tp_matrix = np.zeros((P, T), dtype=np.uint8)
    num_notes = np.random.randint(2, 9)
    
    for i in range(num_notes):
        start, end = np.random.randint(1, T - 2, 2)
        start, end = min(start, end), max(start, end) + 1
        pitch = np.random.randint(_PITCH_MIN, _PITCH_MAX + 1)
        
        for t in range(start, end):
            tp_matrix[pitch, t] = 1
            
    return tp_matrix

In [None]:
def visualize_tp_matrix(tp_matrix, Fs, timecell_ms):
    '''
    :param tp_matrix (P, T) numpy array: Timecell / frequency binary description of melody.
    :param Fs (int): Sample rate [Hz].
    :param timecell_ms (int): Duration of one timecell (i.e. element in the horizontal axis) [ms].
    '''
    import matplotlib.ticker as mticker
    
    # NOTE: In MIDI, pitch index 69 = note A4 = frequency 440 Hz.
    (P, T) = tp_matrix.shape
    assert P == 128
    num_pitches = _PITCH_MAX - _PITCH_MIN + 1
    freq_min = 440 * 2 ** ((_PITCH_MIN - 69) / 12)
    freq_max = 440 * 2 ** ((_PITCH_MAX - 69) / 12)
    print('freq_min:', freq_min, ' freq_max:', freq_max)
    duration_ms = T * timecell_ms
    
    plt.figure()
    x = np.linspace(timecell_ms / 2.0, duration_ms - timecell_ms / 2.0, T) / 1000.0
    # NOTE: Frequencies are inverted here to ensure high pitched notes are displayed on top.
#     y = np.logspace(np.log10(freq_max), np.log10(freq_min), num_pitches)
    y = np.logspace(np.log10(freq_min), np.log10(freq_max), num_pitches)
    X, Y = np.meshgrid(x, y)
    
    im = plt.pcolormesh(X, Y, tp_matrix[_PITCH_MIN:_PITCH_MAX + 1], shading='nearest')
    plt.xlabel('Time [s]')
    plt.ylabel('Frequency [Hz]')
    plt.yscale('log')
    ax = plt.gca()
    ax.yaxis.set_minor_formatter(mticker.ScalarFormatter())
    plt.xlim(0, duration_ms / 1000)
    plt.ylim(freq_min * 2 ** (-0.5 / 12), freq_max  * 2 ** (0.5 / 12))
    plt.grid(True)
    plt.show()

In [None]:
def tp_matrix_to_event_seq(tp_matrix, timecell_ms):
    '''
    Converts a dense time/pitch matrix into a sequence of events.
    :param tp_matrix (P, T) numpy array: Timecell / frequency binary description of melody.
    :param timecell_ms (int): Duration of one timecell (i.e. element in the horizontal axis) [ms].
    :return event_seq (list of (str, int) tuples): List of DAW commands encoded as performance events.
    '''
    # NOTE: In MIDI, pitch index 69 = note A4 = frequency 440 Hz.
    (P, T) = tp_matrix.shape
    assert P == 128
    
    event_seq = []
    note_status = np.ones(P, dtype=np.int32) * (-1)  # Indicates whether a note is currently active, and since when.
    
    # Iterate temporally from start to end first.
    for t in range(0, T):
        for p in range(0, P):
            
            if tp_matrix[p, t]:
                if note_status[p] == -1:
                    # From inactive to active.
                    event_seq.append(('NOTE_ON', p))
                    note_status[p] = t
            
            else:
                if note_status[p] != -1:
                    # From active to inactive.
                    event_seq.append(('NOTE_OFF', p))
                    note_status[p] = -1
        
        # Pass some time (merge durations into a single command whenever possible).
        if len(event_seq) != 0 and event_seq[-1][0] == 'TIME_SHIFT':
            event_seq[-1] = ('TIME_SHIFT', event_seq[-1][1] + timecell_ms)
        else:
            event_seq.append(('TIME_SHIFT', timecell_ms))
    
    return event_seq

In [None]:
def event_seq_to_tp_matrix(event_seq):
    pass

In [None]:
def event_seq_to_waveform(soundfont_path, event_seq, Fs):
    '''
    Uses pyfluidsynth to synthesize the given sequence of events into audio samples.
    :param soundfont_path (str): Path to SoundFont instrument file.
    :param event_seq (list of (str, int) tuples): List of DAW commands encoded as performance events.
    :param Fs (int): Sample rate [Hz].
    :return samples (T) int16 numpy array.
    NOTE: The length T is determined by the total sum of TIME_SHIFT values in event_seq.
    '''
    # QUICKSTART:
    # program_select(track, soundfontid, banknum, presetnum)
    # noteon(track, midinum, velocity)
    # noteoff(track, midinum)
    # get_samples(len)
    
    # Instantiate synthesizer with a single soundfont instrument.
    fl = fluidsynth.Synth()
    sfid = fl.sfload(soundfont_path)
    fl.program_select(0, sfid, 0, 0)
    
    # Run through list of events and encode them into sound.
    samples_list = []
    
    for event in event_seq:
        command = event[0]  # NOTE_ON / NOTE_OFF / TIME_SHIFT.
        parameter = int(event[1])  # duration [ms] / note [pitch].
        
        if command == 'NOTE_ON':
            midinum = parameter
            velocity = 75
            fl.noteon(0, midinum, velocity)
            
        elif command == 'NOTE_OFF':
            midinum = parameter
            fl.noteoff(0, midinum)
            
        elif command == 'TIME_SHIFT':
            duration = parameter / 1000.0  # in seconds.
            num_samples = int(duration * Fs)
            cur_samples = fl.get_samples(num_samples)
            cur_samples = cur_samples.astype(np.int16)
            samples_list.append(cur_samples)

    fl.delete()
    all_samples = np.concatenate(samples_list, axis=0)
    return all_samples

In [None]:
def stft_mag_to_waveform(f, t, Zxx):
    scipy.signal.istft()