In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import sys
sys.path.insert(0, '../')
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import numpy as np
import tensorflow as tf
import scipy.io

from gantools import data
from gantools import utils
from gantools import plot
from gantools.model import WGAN, InpaintingGAN
from gantools.data.Dataset import Dataset
from gantools.gansystem import GANsystem
from gantools.data import fmap
import functools
import matplotlib.pyplot as plt
from copy import deepcopy

# Parameters

In [None]:
downscale = 1

# Define parameters for the WGAN

In [None]:
global_path = '../saved_results'

name = 'maestro_160_64_multiple_dis'

## Parameters

In [None]:
from gantools import blocks
bn = False
signal_split = [160, 64, 160]
md = 32

params_discriminator = dict()
params_discriminator['stride'] = [2,2,2,2,2]
params_discriminator['nfilter'] = [md, 2*md, 4*md, 8*md, 16*md]
params_discriminator['shape'] = [[5, 5], [5, 5], [5, 5], [5, 5], [5, 5]]
params_discriminator['batch_norm'] = [bn, bn, bn, bn, bn]
params_discriminator['full'] = []
params_discriminator['minibatch_reg'] = False
params_discriminator['summary'] = True
params_discriminator['data_size'] = 2
params_discriminator['apply_phaseshuffle'] = True
params_discriminator['spectral_norm'] = True
params_discriminator['activation'] = blocks.lrelu

params_generator = dict()
params_generator['stride'] = [2, 2, 2, 2, 2]
params_generator['latent_dim'] = 100
params_generator['nfilter'] = [8*md, 4*md, 2*md, md, 1]
params_generator['shape'] = [[5, 5], [5, 5], [5, 5], [5, 5], [5, 5]]
params_generator['batch_norm'] = [bn, bn, bn, bn]
params_generator['full'] = [256*md]
params_generator['summary'] = True
params_generator['non_lin'] = tf.nn.tanh
params_generator['activation'] = tf.nn.relu
params_generator['data_size'] = 2
params_generator['spectral_norm'] = True 
params_generator['in_conv_shape'] =[8, 2]
params_generator['borders'] = dict()
params_generator['borders']['nfilter'] = [md, 2*md, md, md/2]
params_generator['borders']['batch_norm'] = [bn, bn, bn, bn]
params_generator['borders']['shape'] = [[5, 5],[5, 5],[5, 5],[5, 5]]
params_generator['borders']['stride'] = [2, 2, 3, 4]
params_generator['borders']['data_size'] = 2
# This does not work because of flipping, border 2 need to be flipped tf.reverse(l, axis=[1]), ask Nathanael 
params_generator['borders']['width_full'] = None 
params_generator['borders']['activation'] = tf.nn.relu


# Optimization parameters inspired from 'Self-Attention Generative Adversarial Networks'
# - Spectral normalization GEN DISC
# - Batch norm GEN
# - TTUR ('GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium')
# - ADAM  beta1=0 beta2=0.9, disc lr 0.0004, gen lr 0.0001
# - Hinge loss
# Parameters are similar to the ones in those papers...
# - 'PROGRESSIVE GROWING OF GANS FOR IMPROVED QUALITY, STABILITY, AND VARIATION'
# - 'LARGE SCALE GAN TRAINING FOR HIGH FIDELITY NATURAL IMAGE SYNTHESIS'
# - 'CGANS WITH PROJECTION DISCRIMINATOR'

params_optimization = dict()
params_optimization['batch_size'] = 64*2
params_optimization['epoch'] = 600
params_optimization['n_critic'] = 5
params_optimization['generator'] = dict()
params_optimization['generator']['optimizer'] = 'adam'
params_optimization['generator']['kwargs'] = {'beta1':0.5, 'beta2':0.9}
params_optimization['generator']['learning_rate'] = 1e-4
params_optimization['discriminator'] = dict()
params_optimization['discriminator']['optimizer'] = 'adam'
params_optimization['discriminator']['kwargs'] = {'beta1':0.5, 'beta2':0.9}
params_optimization['discriminator']['learning_rate'] = 1e-4



# all parameters
params = dict()
params['net'] = dict() # All the parameters for the model
params['net']['generator'] = params_generator
params['net']['discriminator'] = params_discriminator
params['net']['prior_distribution'] = 'gaussian'
params['net']['shape'] = [256, 128*3, 1] # Shape of the image
params['net']['inpainting']=dict()
params['net']['inpainting']['split']=signal_split
params['net']['gamma_gp'] = 10 # Gradient penalty
params['net']['fs'] = 16000//downscale
params['net']['loss_type'] ='wasserstein'

params['optimization'] = params_optimization
params['summary_every'] = 250 # Tensorboard summaries every ** iterations
params['print_every'] = 50 # Console summaries every ** iterations
params['save_every'] = 1000 # Save the model every ** iterations
params['summary_dir'] = os.path.join(global_path, name +'_summary/')
params['save_dir'] = os.path.join(global_path, name + '_checkpoints/')
params['Nstats'] = 500


In [None]:
resume, params = utils.test_resume(True, params)
# If a model is reloaded and some parameters have to be changed, then it should be done here.
# For example, setting the number of epoch to 5 would be:
# params['optimization']['epoch'] = 5000


# Build the model

In [None]:
from gantools.model import MultipleDiscrimnatorInpaintingGAN

wgan = GANsystem(MultipleDiscrimnatorInpaintingGAN, params)

In [None]:
def read_tfrecord(serialized_example):
    feature_description = {
        'train/window': tf.io.FixedLenFeature((), tf.string)}
    example = tf.io.parse_single_example(serialized_example, feature_description)
    spectrogram = tf.reshape(tf.decode_raw(example['train/window'], tf.float32), [256, 384])

    return spectrogram

num_epochs = 10

dataset = tf.data.TFRecordDataset("../data/yiruma_train_inpainting_w384_h32_27261.tfrecords")
dataset = dataset.shuffle(buffer_size=10000)
dataset = dataset.repeat(num_epochs)
dataset = dataset.map(map_func=read_tfrecord)#, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(batch_size=128)
dataset = dataset.prefetch(buffer_size=1) # this should be the last transformation
dataset.N = 2837745

In [None]:
nsamples = 64
nlatent = 100

def clip_dist2(nsamples, nlatent, m=2.5):
    shape = [nsamples, nlatent]
    z = np.random.randn(*shape)
    support = np.logical_or(z<-m, z>m)
    while np.sum(support):
        z[support] = np.random.randn(*shape)[support]
        support = np.logical_or(z<-m, z>m)
    return z

d2 = clip_dist2(nsamples, nlatent)
np.max(d2), np.min(d2)

# Generate new samples
To have meaningful statistics, be sure to generate enough samples
* 2000 : 32 x 32
* 500 : 64 x 64
* 200 : 128 x 128


In [None]:
iterator = tf.compat.v1.data.make_one_shot_iterator(dataset).get_next()
real_signals = tf.Session().run(iterator)

In [None]:
N = 64*2 # Number of samples
#real_signals = dataset.get_samples(N=N)
border1 = real_signals[:, :, :signal_split[0]]
border2 = real_signals[:, :, -signal_split[2]:]
borders = np.stack([border1, border2], axis=3)
gen_sample = np.squeeze(wgan.generate(N=N, borders=borders[:64], z=d2))

In [None]:
plt.figure(figsize=(15,15))
plot.draw_images(gen_sample,nx=4,ny=4);
plt.title("Inpainted samples");

In [None]:
plt.figure(figsize=(15,15))
plot.draw_images(real_signals,nx=4,ny=4);
plt.title("Original samples");

In [None]:
plt.figure(figsize=(15,15))
plot.draw_images(real_signals[:64]-gen_sample,nx=4,ny=4);
plt.title("Diffs");

In [None]:
##Phase recovery

from data.ourLTFATStft import LTFATStft
import ltfatpy
from phase_recovery.numba_pghi import pghi
from data.modGabPhaseGrad import modgabphasegrad
ltfatpy.gabphasegrad = modgabphasegrad # The original function is not implemented for one sided stfts on ltfatpy


generated_signals = np.exp(5*(gen_sample-1)) # Undo preprocessing
generated_signals = np.concatenate([generated_signals, np.zeros_like(gen_sample)[:, 0:1, :]], axis=1) #Fill last column of freqs with zeros

fft_hop_size = 128 # Change the fft params if the dataset was generated with others
fft_window_length = 512
L = 16384*3
clipBelow = -10

anStftWrapper = LTFATStft()

# Compute Tgrad and Fgrad from the generated spectrograms
tgrads = np.zeros_like(generated_signals)
fgrads = np.zeros_like(generated_signals)
gs = {'name': 'gauss', 'M': fft_window_length}
for index, magSpectrogram in enumerate(generated_signals):
    tgrads[index], fgrads[index] = ltfatpy.gabphasegrad('abs', magSpectrogram, gs, fft_hop_size)

reconstructed_audios = np.zeros([len(generated_signals), L])
for index, magSpectrogram in enumerate(generated_signals):
    logMagSpectrogram = np.log(magSpectrogram.astype(np.float64))
    phase = pghi(logMagSpectrogram, tgrads[index], fgrads[index], fft_hop_size, fft_window_length, L, tol=10)
    reconstructed_audios[index] = anStftWrapper.reconstructSignalFromLoggedSpectogram(logMagSpectrogram, phase, windowLength=fft_window_length, hopSize=fft_hop_size)

print("reconstructed audios!")

In [None]:
from IPython.display import display, Audio

for generated_audio_signal in reconstructed_audios:
    display(Audio(generated_audio_signal, rate=16000))

In [None]:
complete_audio = np.array([])
for generated_audio_signal in reconstructed_audios:
    complete_audio = np.append(complete_audio, np.append(generated_audio_signal, np.zeros(6000)))


In [None]:
display(Audio(complete_audio, rate=16000))

In [None]:
import librosa
librosa.output.write_wav('valid_160_64_inpainting_130k.wav', complete_audio, 16000)

In [None]:
print(len(generated_audio_signal))

In [None]:
real_signals = ((real_signals-1)*5)
gen_sample = ((gen_sample-1)*5)

print(real_signals.max())
print(real_signals.min())
print(real_signals.mean())

print(gen_sample.max())
print(gen_sample.min())
print(gen_sample.mean())
import scipy.io

scipy.io.savemat('test_valid_160_64_inpainting_130k.mat', {"original": real_signals, "inpainted": gen_sample})

Display a few fake samples