In [8]:
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import scipy.io.wavfile as wvf
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout, LeakyReLU, Reshape, \
    Conv2DTranspose, Lambda

For our discriminator model, we need to be able to classify input data as real samples or fake samples. 

In [9]:
def define_discriminator(shape=(1024, 36, 1)):
    model = Sequential()
    model.add(Conv2D(64, (3, 3), strides=(2, 2), padding="same", activation="tanh",
                     input_shape=shape))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(Conv2D(128, (3, 3), strides=(2, 2), padding="same", activation="tanh"))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(Flatten())
    model.add(Dense(1, activation="sigmoid"))
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [32]:
def define_generator(latent_dimension):
    n_nodes = 128 * 256 * 9
    model = Sequential()
    model.add(Dense(n_nodes, input_dim=latent_dimension))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Reshape((9, 256, 128)))
    model.add(Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Conv2D(1, (16, 8), activation='sigmoid', padding='same'))
    model.add(Reshape((1024, 36, 1)))
    return model


In [1]:
# latent_dim = 100
# model = define_generator(latent_dim)
# model.summary()


In [11]:
def define_gan(g_model, d_model):
    d_model.trainable = False
    model = Sequential()
    model.add(g_model)
    model.add(d_model)
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    return model

In [None]:
latent_dim = 100
gen_model = define_generator(latent_dim)
disc_model = define_discriminator()
model = define_gan(gen_model, disc_model)
model.summary()


In [19]:
# load and prepare mnist training images
def load_real_samples():
    """loads all hard kick samples and returns melspectrogram data"""
    feature = []
    for i in range(1, 202):
        d, sr = librosa.load("sample_kick/VEH1 Hard Kick - " + str(i).zfill(3) + ".wav", sr=44100, res_type='kaiser_fast')
        mels = np.mean(librosa.feature.melspectrogram(y=d, sr=sr).T, axis=0)
        feature.append(mels)
        
    trainX = np.array(feature)
    return trainX

- generate wav files 
- convert to spectrogram for analysis
- update generation based on discriminator

In [21]:
def generate_real_samples(dataset, n_samples):
    # choose random instances
    ix = randint(0, dataset.shape[0], n_samples)
    # retrieve selected images
    X = dataset[ix]
    # generate 'real' class labels (1)
    y = ones((n_samples, 1))
    return X, y

In [14]:
def generate_latent_points(latent_dim, n_samples):
    print("generating latent points")
    x_input = randn(latent_dim * n_samples)
    x_input = x_input.reshape(n_samples, latent_dim)
    return x_input

- input: point in latent space
- output: 20000 dimensional wav array with values in (??)

In [20]:
def generate_fake_samples(g_model, latent_dim, n_samples):
    """generates fake samples"""
    print("generating fake samples")
    X = g_model.predict(generate_latent_points(latent_dim, n_samples))
    y = zeros((n_samples, 1))    
    return X, y

def array_to_spectrogram(array):
    feature = []
    for i in range(len(X)):
        mels = np.mean(librosa.feature.melspectrogram(y=X[i], sr=44100).T, axis=0)
        feature.append(mels)
    return np.array(feature)

In [22]:
# size of the latent space
latent_dim = 100
# create the discriminator
d_model = define_discriminator()
# create the generator
g_model = define_generator(latent_dim)
# create the gan
gan_model = define_gan(g_model, d_model)
# load image data
dataset = load_real_samples()
# train model
train(g_model, d_model, gan_model, dataset, latent_dim)

ValueError: Input 0 of layer sequential_1 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: [None, 20000, 1]