## Synthesize MIDI File
### Cleaned-up Version

Imports

In [1]:
%%capture
%load_ext autoreload
%autoreload 2
import os
import copy
import argparse
import tensorflow as tf
import soundfile as sf
import scipy
from soundfile import write
from ddsp.training import trainers, train_util
from ddsp_piano.default_model import get_model, build_model
from ddsp_piano.utils.io_utils import load_midi_as_conditioning
from tensorflow.python.client import device_lib
from plots import bar_plot, subplot_input, subplot_output, plot_context
from PS_helper_Functions import save_as_wav, create_midi_and_synthesize
import numpy as np
from IPython.display import Audio




2024-12-12 19:39:12.954289: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Modification Functions:

In [2]:
def modify_context_matrix(context):
    # modification of context network 
    # print('modify context vector')
    # print(context.shape)
    # context *= 0
    # print('context modified')
    return context

In [3]:
def modify_conditioning_matrix_at_context_input(conditioning):
    # modification of conditioning matrix at context network 
    # print("modify conditioning @ context")
    # conditioning[0,:,:,0][conditioning[0,:,:,0] != 0] = (np.mod(conditioning[0,:,:,0], 12) + 24)[conditioning[0,:,:,0] != 0] 
    return conditioning

In [4]:
def modify_conditioning_matrix_at_monophonic_input(conditioning):
    # modification of conditioning matrix at monophonic network
    # print("modify conditioning @ monophonic")
    # conditioning[:,:,0][conditioning[:,:,0] != 0] = (np.mod(conditioning[:,:,0], 12) + 24)[conditioning[:,:,0] != 0] 
    # # print(conditioning[0,249:300,0])
    return conditioning

In [5]:
def modify_z_vector(z):
    # modification of z-vector 
    # print("modify z")
    # for i in range(16):
    #     if i in []:
    #         z[:,:,i]*=50000
    return z

In [6]:
def modify_global_inharmonicity(global_inharmonicity):
    # modification of global inharmonicity
    # print("modify global inharmonicity")
    # global_inharmonicity[:] = 500
    return global_inharmonicity

In [7]:
def modify_global_detuning(global_detuning):
    # # modification of global detuning
    # print("modify global detuning")
    # global_detuning[:] = 2000000
    return global_detuning

In [29]:
def modify_ir(ir):
    # modification of IR
    # print("modify ir")
    # print(ir.shape)
    # _ , IRwav = scipy.io.wavfile.read('/Users/simonwindtner/Library/Mobile Documents/com~apple~CloudDocs/Downloads/E001_R001_M01.wav')
    # sp = 4052
    # ir = IRwav[sp:(sp+24000)]
    # ir[0,0,:] = 0
    # ir[0,0,3] = 3
    # for i in [1,200,400,600,800, 16000]:
    #     ir[0,0,i] = 3.0
    return ir

In [9]:
def update_modfication_function():
    modification_functions = {
        "modify_context_matrix":modify_context_matrix,
        "modify_global_detuning":modify_global_detuning,
        "modify_global_inharmonicity":modify_global_inharmonicity,
        "modify_conditioning_matrix_at_context_input":modify_conditioning_matrix_at_context_input,
        "modify_conditioning_matrix_at_monophonic_input":modify_conditioning_matrix_at_monophonic_input,
        "modify_z_vector":modify_z_vector,
        "modify_ir":modify_ir
        }
    model.modification_functions = modification_functions

In [27]:
# set of time duration (s) of the synthesized files.
# after every change of the duration the model has to be built new.

duration = 10
modelduration = duration

Building Model

In [28]:
%%capture
class dummyargs:
    def __init__(self, in_file, out_file, duration):
        self.midi_file = in_file
        self.out_file = out_file
        self.duration = duration
        self.piano_type = 3
        self.ckpt = "ddsp_piano/model_weights/ckpt-0"

args = dummyargs("glissandoslow.mid", "GlissTest.wav", duration)

# Load MIDI data
print("Loading midi file...")
inputs = load_midi_as_conditioning(args.midi_file, duration=args.duration)
# Add piano model conditioning
inputs['piano_model'] = tf.convert_to_tensor([[args.piano_type]])

# Model contruction
print(f"Midi file loaded (with duration {inputs['duration']} s).\
        \nNow building the piano synthesizer...")
strategy = train_util.get_strategy()
with strategy.scope():
    model = get_model(inference=True, duration=inputs['duration'])
    modification_functions = {
        "modify_context_matrix":modify_context_matrix,
        "modify_global_detuning":modify_global_detuning,
        "modify_global_inharmonicity":modify_global_inharmonicity,
        "modify_conditioning_matrix_at_context_input":modify_conditioning_matrix_at_context_input,
        "modify_conditioning_matrix_at_monophonic_input":modify_conditioning_matrix_at_monophonic_input,
        "modify_z_vector":modify_z_vector,
        "modify_ir":modify_ir
        }
    model.modification_functions = modification_functions
    model = build_model(model,
                        batch_size=1,
                        duration=inputs['duration'])
    # Restore model weight
    print("Model built, now retrieving model weights...")
    trainer = trainers.Trainer(model, strategy=strategy)
    trainer.restore(args.ckpt)



Some misc functions

In [14]:
def save_as_wav(outputs, filename="out.wav"):
        data = outputs['audio_synth'][0].numpy()
        data /= np.abs(data).max()
        write(filename,
                data=data,
                samplerate=16000)
        print(f"Audio saved at {filename}")

In [15]:
def load_midi_and_synthesize(midiinput, model, duration, piano_type, save_output=False, output_file = "out.pkl", callargs=None):
    update_modfication_function()
    inputs = load_midi_as_conditioning(midiinput, duration=duration)
    inputs['piano_model'] = tf.convert_to_tensor([[piano_type]])
    if callargs is not None:
        model.callargs = callargs
    output = model(inputs)
    if save_output:
        with open(output_file, "wb") as file:
            dill.dump(output, file)
    return output

# Playground:
the function *load_midi_and_synthesize* takes 4 argumenst
- the MIDI file e.g. "KK_Schumann_exp.mid"
- the generated model "model"
- the time for how long the sample should be synthesized "15"
- the piano ID, basically which model should be used. "7"

In [138]:
KK_Schumann_exp = load_midi_and_synthesize("KK_Schumann_exp.mid", model, 15, 7)
JBS_Fuge_ref_context = load_midi_and_synthesize("JBS_Fuge.mid", model, 15, 7)

In [758]:
notedict = {}
notedict["beats"] = [[i] for i in range(20, 100)]
notedict["velocities"] = [[[90]]*len(notedict["beats"])][0]
notedict["pedal"] = 0
notedict["duration"] = .05
notedict["pause"] = 0.0
notedict["silence"] = 10
gliss = create_midi_and_synthesize(notedict, model, duration, 7, notenumber = True, f_vibrato = 0, amplitude_vibrato = 1, factor_velocity = 1, callargs = None, save_output=False, output_file="out.pkl")

In [None]:
notedict = {}
notedict["beats"] = [["C2"],["G3"],["D4"]]
notedict["velocities"] = [[100],[100],[100]]
notedict["pedal"] = 0
notedict["duration"] = 2
notedict["pause"] = 1.
notedict["silence"] = 10
vibrato5 = create_midi_and_synthesize(notedict, model, duration, 7, notenumber = False, f_vibrato = 10, amplitude_vibrato = 50, factor_velocity = 1, callargs = None, save_output=False, output_file="out.pkl")

In [23]:
Audio(Friml["audio_synth"][0].numpy(), rate=16000)

In [32]:

save_as_wav(Resonance_lower_C_m7, filename="Resonance_lower_C_m7.wav")


Audio saved at Resonance_lower_C_m7.wav
