In [None]:
USE_PRIVATE_DISTRO = True

DRIVE_BASE_DIR = '/content/drive/MyDrive/SMC 10/DDSP-10/'

if USE_PRIVATE_DISTRO:
    print("[INFO] Using private distro. Be careful.")
    from google.colab import drive
    drive.mount('/content/drive')
    !pip install -qU /content/drive/MyDrive/SMC\ 10/DDSP-10/dist/ddsp-1.2.0.tar.gz
else:
    !pip install -qU ddsp

In [None]:
modelname = 'vae_params_std10_ar_fixed' #@param {type: "string"}


#Import libraries

In [None]:
import keras
from keras import layers
from keras import backend
from keras import losses

import tensorflow as tf

import numpy as np

from ddsp import synths
from ddsp.colab import colab_utils
from ddsp import core

import seaborn as sns
from matplotlib import pyplot as plt
sns.set(style="whitegrid")
%config InlineBackend.figure_format='retina'

#Load autoencoder

In [None]:
autoencoder  = keras.models.load_model(DRIVE_BASE_DIR + 'models/' + modelname)
encoder_nn = keras.Model(autoencoder.inputs, autoencoder.layers[1].outputs, name='encoder')
decoder_nn = keras.Model(autoencoder.layers[2].inputs, autoencoder.layers[2].outputs, name='decoder')

USE_VAE = len(autoencoder.layers[1].outputs)==3

# Generate synthetic dataset

In [None]:
SAMPLE_RATE = 48000
DURATION = 20

N_SAMPLES = int(SAMPLE_RATE * DURATION)

fm_synth = synths.FrequencyModulation(n_samples=N_SAMPLES,
                                      sample_rate=SAMPLE_RATE)

algorithms = [[1,0,0,0], [1,0,0,0], [1,0,0,0], [1,0,0,0], [1,0,0,0],
              [1,1,0,0], [1,1,0,0], [1,1,0,0], [1,1,0,0], [1,1,0,0], 
              [1,1,1,0], [1,1,1,0], [1,1,1,0],
              [1,1,1,1]] 

connections=[[43,32,21],[42,32,21],[42,31,21],[43,42,31,21],[41,31,21],
             [42,31],[42,32],[42,32,31],[43,32],[43,32,31],
             [43],[43,42],[43,42,41],
             []]

mods = {'21':0, '31':0, '32':0, '41':0, '42':0, '43':0}

In [None]:
DATA_SIZE = 10000
TRAIN_SIZE = 8*DATA_SIZE//10
TEST_SIZE = DATA_SIZE - TRAIN_SIZE

patch_x = []
patch_y = []

for patch in range(DATA_SIZE):

  current_patch=[]
  current_algorithm = np.random.randint(len(algorithms))

  #Operators
  for f in range(4):

    #amp [0, 1]
    amp = float(algorithms[current_algorithm][f])
    current_patch.append(amp)

    #index [0.5, 1, ..., 4.5] -> [1/9, 2/9, ..., 1]
    index = 1.0 if f==0 else np.random.randint(1,10)
    index = index/9.0
    current_patch.append(index)
    
    #env [0..1)
    current_patch += np.random.random(1).tolist()

  #Modulators
  #mods [0, 1, ..., 11] -> [0, 1/11, 2/11, ..., 1]
  for modulator in mods.keys():
    if int(modulator) in connections[current_algorithm]:
      mods[modulator] = np.random.randint(0,12)/11.0
    else:
      mods[modulator] = 0

  current_patch += [mods['21'], mods['31'], mods['32'], mods['41'], mods['42'], mods['43']]

  patch_x.append(current_patch)
  patch_y.append(current_algorithm)

patch_x = np.array(patch_x)
patch_y = np.array(patch_y)

# Encode dataset

In [None]:
if USE_VAE:
  _, _, encoded_data = encoder_nn.predict(patch_x)
else:
  encoded_data = encoder_nn.predict(patch_x)
  
decoded_data = decoder_nn.predict(encoded_data)

#Plot results

In [None]:
x_from = -1 if USE_VAE else 5
x_to = 1 if USE_VAE else 15

y_from = 1 if USE_VAE else 5
y_to = -1 if USE_VAE else 10

samples = np.linspace(start=[x_from,y_from], stop=[x_to,y_to], num=100)
predicted = decoder_nn.predict(samples)

In [None]:
plt.figure(figsize=(12, 6))
plt.gca().set_aspect('equal', adjustable='box')
plt.set_cmap('jet')
plt.scatter(encoded_data[:,0], encoded_data[:,1], c=patch_y, s=1, alpha=0.6, zorder=1)
plt.plot([x_from,x_to],[y_from,y_to], c='white', linewidth=6, alpha=0.75, zorder=2)
plt.plot([x_from,x_to],[y_from,y_to], c='black', linewidth=2, zorder=3)
plt.scatter([x_from,x_to],[y_from,y_to], c='white', s=100, edgecolors='black', linewidth=2, zorder=4)

plt.colorbar()
plt.show()

f, ax = plt.subplots(2, 2, figsize=(8, 6), sharex=True)

ax[0][0].set_title('Amp')
for f in range(4):
  ax[0][0].plot(predicted[:,f*3])

ax[0][1].set_title('Index')
for f in range(4):
  ax[0][1].plot(predicted[:,f*3+1])

ax[1][0].set_title('Env')
for f in range(4):
  ax[1][0].plot(predicted[:,f*3+2])

ax[1][1].set_title('Modulators')
for f in range(6):
  ax[1][1].plot(predicted[:,12+f])
plt.show()

#Synthesize audio

In [None]:
def expand(t):
  t = tf.expand_dims(t, axis=0)
  t = tf.expand_dims(t, axis=0)
  return t

def patch_to_tensor(f0_hz, patch):
  patch = np.array(patch)
  f0 = expand(tf.expand_dims(tf.convert_to_tensor(f0_hz, dtype=tf.float32), axis=0))
  op1 = expand(tf.convert_to_tensor(patch[0:3]*[1,4.5,1], dtype=tf.float32))
  op2 = expand(tf.convert_to_tensor(patch[3:6]*[1,4.5,1], dtype=tf.float32))
  op3 = expand(tf.convert_to_tensor(patch[6:9]*[1,4.5,1], dtype=tf.float32))
  op4 = expand(tf.convert_to_tensor(patch[9:12]*[1,4.5,1], dtype=tf.float32))
  modulators = expand(tf.convert_to_tensor(patch[12:]*11, dtype=tf.float32))
  return f0, op1, op2, op3, op4, modulators

In [None]:
# Direct interpolation between patches

current_patch = patch_x[np.random.randint(len(patch_x))]
f0, op1, op2, op3, op4, modulators = patch_to_tensor(440, current_patch)

for f in range(9):
  current_patch = patch_x[np.random.randint(len(patch_x))]
  f0_temp, op1_temp, op2_temp, op3_temp, op4_temp, modulators_temp = patch_to_tensor(440, current_patch)
  f0 = tf.concat([f0, f0_temp, f0_temp, f0_temp], axis=1)
  op1 = tf.concat([op1, op1_temp, op1_temp, op1_temp], axis=1)
  op2 = tf.concat([op2, op2_temp, op2_temp, op2_temp], axis=1)
  op3 = tf.concat([op3, op3_temp, op3_temp, op3_temp], axis=1)
  op4 = tf.concat([op4, op4_temp, op4_temp, op4_temp], axis=1)
  modulators = tf.concat([modulators, modulators_temp, modulators_temp, modulators_temp], axis=1)

audio = fm_synth(f0, op1, op2, op3, op4, modulators)
audio /= tf.reduce_max(audio[0,:], axis=0).numpy()*1.5
colab_utils.play(audio, sample_rate=SAMPLE_RATE, autoplay=True)

colab_utils.specplot(audio)

In [None]:
# Going in circles around Z

num_steps = 4

rads = np.linspace(0,2*np.pi,num_steps, endpoint=False)
x = np.cos(rads)*2.0
y = np.sin(rads)*2.0

f0, op1, op2, op3, op4, modulators = patch_to_tensor(440, decoder_nn.predict([[x[0],y[0]]])[0])
for f in range(1, num_steps):
  f0_temp, op1_temp, op2_temp, op3_temp, op4_temp, modulators_temp = patch_to_tensor(440, decoder_nn.predict([[x[f],y[f]]])[0])
  f0 = tf.concat([f0, f0_temp], axis=1)
  op1 = tf.concat([op1, op1_temp], axis=1)
  op2 = tf.concat([op2, op2_temp], axis=1)
  op3 = tf.concat([op3, op3_temp], axis=1)
  op4 = tf.concat([op4, op4_temp], axis=1)
  modulators = tf.concat([modulators, modulators_temp], axis=1)

audio = fm_synth(f0, op1, op2, op3, op4, modulators)
audio /= tf.reduce_max(audio[0,:], axis=0).numpy()*1.5
colab_utils.play(audio, sample_rate=SAMPLE_RATE, autoplay=True)

colab_utils.specplot(audio)