#Project: HummingFace | GAN

Léo Dupire & Mateus Aragão

##Data

In [None]:
%cd ./Desktop/HummingFace

/Users/leodupire/Desktop/HummingFace


In [None]:
#@title Imports
import os
import math
import numpy as np
from numpy.random import randn
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from scipy import signal
from scipy.io import wavfile
from pydub import AudioSegment
from IPython.display import Audio, display

###Spectrogram Functions (explore MFCCs)

In [None]:
# Convert raw audio into spectrogram
def spectrogramify(instance, phase, nperseg=1000):
  # instance: record name
  # phase: train, val, or test
  dir = "./nsynth-" + str(phase) + "/audio/" + str(instance) + ".wav"
  sample_rate, samples = wavfile.read(dir)
  frequencies, times, spectrogram = signal.stft(samples, fs=sample_rate, nperseg=nperseg)

  return sample_rate, frequencies, times, spectrogram


# Display spectrogram
def show_spectro(times, frequencies, spectrogram):
  f = plt.figure()
  f.set_figwidth(5)
  f.set_figheight(3)

  plt.pcolormesh(times, frequencies, np.abs(spectrogram))
  plt.ylabel('Frequency [Hz]')
  plt.xlabel('Time [sec]')
  plt.ylim([0, 2000])
  plt.show()


# Generate .wav file from spectrogram
def spectro_to_wav(spectrogram, sample_rate = 16000):
  _, gen = signal.istft(spectrogram, sample_rate)
  wavfile.write("output.wav", sample_rate, gen.astype(np.int16))
  return True


# Display/play audio from .wav file
def spectro_to_audio(spectrogram, sample_rate = 16000):
  spectro_to_wav(spectrogram, sample_rate)
  wav = "output.wav"
  display(Audio(wav, autoplay=True))


# Convert imaginary-valued spectrogram into depth-2 matrix (depth #1: real, depth #2: imaginary)
def decompose_spect(y):
  y_real = []
  y_imag = []
  for i in range(len(y)):
    y_real.append(y[i].real)
    y_imag.append(y[i].imag)
  return np.array(y_real), np.array(y_imag)


# Convert depth-2 (real, imaginary) matrix back to depth-1 matrix with values = real + imaginary*i (i = sqrt(-1))
def recompose_spect(y_real, y_imag):
  y = 1j*y_imag
  y += y_real
  return np.array(y)

###Load Data

In [None]:
sample_rate = 16000
times = np.linspace(0, 1, 128)
freqs = np.linspace(0, 2032, 128)

In [None]:
train_real = np.load("./train_real.npy")
train_imag = np.load("./train_imag.npy")
X = pd.read_csv("./train_df.csv")

test_real = np.load("./test_real.npy")
test_imag = np.load("./test_imag.npy")
test_X = pd.read_csv("./test_df.csv")

##GAN

This GAN did not perform well. It outputs noise, much like the Conditional GAN.

https://machinelearningmastery.com/upsampling-and-transpose-convolution-layers-for-generative-adversarial-networks/

In [None]:
#@title Imports
import tensorflow as tf
# from tensorflow.keras import datasets, layers, models
from keras.models import Sequential
from keras.layers import Dense, Reshape, UpSampling2D, Conv2D, Conv2DTranspose, LeakyReLU, Flatten, Dropout
from keras import Input, Model, metrics, optimizers
from keras.utils.vis_utils import plot_model
from keras.optimizers import Adam
from keras.models import load_model

###Build & Compile Model

####Discriminator

https://machinelearningmastery.com/how-to-develop-a-generative-adversarial-network-for-an-mnist-handwritten-digits-from-scratch-in-keras/

In [None]:
# define the standalone discriminator model
def define_discriminator(in_shape=(501,129,2)):
	model = Sequential()
	model.add(Conv2D(64, (3,3), strides=(2, 2), padding='same', input_shape=in_shape))
	model.add(LeakyReLU(alpha=0.2))
	model.add(Dropout(0.4))
	model.add(Conv2D(64, (3,3), strides=(2, 2), padding='same'))
	model.add(LeakyReLU(alpha=0.2))
	model.add(Dropout(0.4))
	model.add(Flatten())
	model.add(Dense(1, activation='sigmoid'))
	# compile model
	opt = Adam(lr=0.0002, beta_1=0.5)
	model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
	return model
 
# define model
model = define_discriminator()
# summarize the model
model.summary()
# plot the model
plot_model(model, to_file='discriminator_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
# select real samples
def generate_real_samples(dataset, n_samples):
  # choose random instances
  ix = np.random.randint(0, dataset.shape[0], n_samples)
  # retrieve selected images
  X = dataset[ix]
  # generate 'real' class labels (1)
  y = np.ones((n_samples, 1))
  return X, y

In [None]:
# use the generator to generate n fake examples, with class labels
def generate_fake_samples(g_model, latent_dim, n_samples):
	# generate points in latent space
	x_input = generate_latent_points(latent_dim, n_samples)
	# predict outputs
	X = g_model.predict(x_input)
	# create 'fake' class labels (0)
	y = np.zeros((n_samples, 1))
	return X, y

#####Train Discriminator

In [None]:
# train the discriminator model
def train_discriminator(model, dataset, n_iter=100, n_batch=256):
	half_batch = int(n_batch / 2)
	# manually enumerate epochs
	for i in range(n_iter):
		# get randomly selected 'real' samples
		X_real, y_real = generate_real_samples(dataset, half_batch)
		# update discriminator on real samples
		_, real_acc = model.train_on_batch(X_real, y_real)
		# generate 'fake' examples
		X_fake, y_fake = generate_fake_samples(half_batch)
		# update discriminator on fake samples
		_, fake_acc = model.train_on_batch(X_fake, y_fake)
		# summarize performance
		print('>%d real=%.0f%% fake=%.0f%%' % (i+1, real_acc*100, fake_acc*100))

####Generator

"LeakyReLU with a default slope of 0.2, reported as a best practice when training GAN models."

In [None]:
# define the standalone generator model
def define_generator(latent_dim):
  model = Sequential()
  # foundation for 7x7 image
  n_nodes = 64 * 5 * 5
  model.add(Dense(n_nodes, input_dim=15))
  model.add(LeakyReLU(alpha=0.2))
  model.add(Reshape((5, 5, 64)))
  # Upsample
  model.add(UpSampling2D())
  model.add(Conv2D(64, (3,3), padding='same'))
  model.add(LeakyReLU(alpha=0.2))
  # Upsample
  model.add(Conv2DTranspose(64, (3,3), strides=(2,2), padding='same'))
  model.add(LeakyReLU(alpha=0.2))
  model.add(UpSampling2D())
  # Upsample
  model.add(Conv2DTranspose(64, (8,8), strides=(3,3)))
  model.add(LeakyReLU(alpha=0.2))
  model.add(Conv2DTranspose(2, (5,5), strides=(4,1)))

  return model

####Latent Space Operations

In [None]:
# define the size of the latent space
latent_dim = 15
# define the generator model
model = define_generator(latent_dim)
# summarize the model
model.summary()
# plot the model
plot_model(model, to_file='generator_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, n_samples):
	# generate points in the latent space
	x_input = np.random.randn(latent_dim * n_samples)
	# reshape into a batch of inputs for the network
	x_input = x_input.reshape(n_samples, latent_dim)
	return x_input

In [None]:
# size of the latent space
latent_dim = 15
# define the discriminator model
model = define_generator(latent_dim)
# generate samples
n_samples = 5
X, _ = generate_fake_samples(model, latent_dim, n_samples)

In [None]:
X_gen = recompose_spect(X[:, :, :, 0], X[:, :, :, 1]) # Turn into complex valued array

In [None]:
# plot the generated samples
for i in range(n_samples):
  show_spectro(y['times'][0], y['freq'][0], X_gen[i])

No audible sound as all the values are too low.

In [None]:
spectro_to_audio(X_gen[0])

###GAN

In [None]:
# define the combined generator and discriminator model, for updating the generator
def define_gan(g_model, d_model):
  # make weights in the discriminator not trainable
  d_model.trainable = False
  # connect them
  model = Sequential()
  # add generator
  model.add(g_model)
  # add the discriminator
  model.add(d_model)
  # compile model
  opt = Adam(lr=0.0002, beta_1=0.5)
  model.compile(loss='binary_crossentropy', optimizer=opt)
  return model

In [None]:
# size of the latent space
latent_dim = 15
# create the discriminator
d_model = define_discriminator()
# create the generator
g_model = define_generator(latent_dim)
# create the gan
gan_model = define_gan(g_model, d_model)
# summarize gan model
gan_model.summary()
# plot gan model
plot_model(gan_model, to_file='gan_plot.png', show_shapes=True, show_layer_names=True)

####Train GAN

In [None]:
# train the composite model
def train_gan(gan_model, latent_dim, n_epochs=100, n_batch=256):
	# manually enumerate epochs
	for i in range(n_epochs):
		# prepare points in latent space as input for the generator
		x_gan = generate_latent_points(latent_dim, n_batch)
		# create inverted labels for the fake samples
		y_gan = np.ones((n_batch, 1))
		# update the generator via the discriminator's error
		gan_model.train_on_batch(x_gan, y_gan)

In [None]:
# train the generator and discriminator
def train(g_model, d_model, gan_model, dataset, latent_dim, n_epochs=50, n_batch=256):
  bat_per_epo = int(dataset.shape[0] / n_batch)
  half_batch = int(n_batch / 2)
  # manually enumerate epochs
  for i in range(n_epochs):
    # enumerate batches over the training set
    for j in range(bat_per_epo):
      # get randomly selected 'real' samples
      X_real, y_real = generate_real_samples(dataset, half_batch)
      # generate 'fake' examples
      X_fake, y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
      # create training set for the discriminator
      X = np.vstack((X_real.reshape(128, 501, 129, 1), X_fake)) # Reshape because of dimension bug
      y = np.vstack((y_real, y_fake))
      # update discriminator model weights
      d_loss, _ = d_model.train_on_batch(X, y)
      # prepare points in latent space as input for the generator
      X_gan = generate_latent_points(latent_dim, n_batch)
      # create inverted labels for the fake samples
      y_gan = np.ones((n_batch, 1))
      # update the generator via the discriminator's error
      g_loss = gan_model.train_on_batch(X_gan, y_gan)
      # summarize loss on this batch
      print('>%d, %d/%d, d=%.3f, g=%.3f' % (i+1, j+1, bat_per_epo, d_loss, g_loss))

    # evaluate the model performance, sometimes
    if (i+1) % 10 == 0:
      summarize_performance(i, g_model, d_model, dataset, latent_dim)

####Evaluation

In [None]:
# evaluate the discriminator, plot generated images, save generator model
def summarize_performance(epoch, g_model, d_model, dataset, latent_dim, n_samples=100):
  # prepare real samples
  X_real, y_real = generate_real_samples(dataset, n_samples)
  # evaluate discriminator on real examples
  _, acc_real = d_model.evaluate(X_real, y_real, verbose=0)
  # prepare fake examples
  x_fake, y_fake = generate_fake_samples(g_model, latent_dim, n_samples)
  # evaluate discriminator on fake examples
  _, acc_fake = d_model.evaluate(x_fake, y_fake, verbose=0)
  # summarize discriminator performance
  print('>Accuracy real: %.0f%%, fake: %.0f%%' % (acc_real*100, acc_fake*100))
  # save plot
  save_plot(x_fake, epoch)
  # save the generator model tile file
  filename = 'generator_model_%03d.h5' % (epoch + 1)
  g_model.save(filename)

In [None]:
# create and save a plot of generated images (reversed grayscale)
def save_plot(examples, epoch, n=10):
  # plot images
  for i in range(n * n):
    show_spectro(y['times'][0], y['freq'][0], examples[i, :, :, 0])

In [None]:
# size of the latent space
latent_dim = 15
# create the discriminator
d_model = define_discriminator()
# create the generator
g_model = define_generator(latent_dim)
# create the gan
gan_model = define_gan(g_model, d_model)
# load image data
dataset = np.array([y_real, y_imag]).reshape(1000, 501, 129, 2)

In [None]:
# train model
train(g_model, d_model, gan_model, dataset, latent_dim)

####GAN Output

In [None]:
# load model
model = load_model('generator_model_0_020.h5')
# generate images
latent_points = generate_latent_points(15, 10)
# generate images
preds = model.predict(latent_points)

In [None]:
X_gen = recompose_spect(preds[:, :, :, 0], preds[:, :, :, 1]) # Turn into complex valued array

# plot the generated samples
for i in range(X_gen.shape[0]):
  show_spectro(y['times'][0], y['freq'][0], X_gen[i])

In [None]:
spectro_to_audio(X_gen[0])