In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# import sys
# sys.path.insert(0, '../')
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"


import numpy as np
import tensorflow as tf

from gantools import data
from gantools import utils
from gantools import plot
from gantools.model import WGAN, LapWGAN, UpscalePatchWGAN
from gantools.gansystem import GANsystem
from gantools.data import fmap
from gantools import evaluation
import functools
import matplotlib.pyplot as plt
from copy import deepcopy

# Parameters

In [None]:
downscale = 1


# Data handling

Load the data

In [None]:
# dataset = data.load.load_audio_dataset(scaling=downscale)
dataset = data.load.load_audio_dataset(scaling=downscale, type='piano', spix=1024*16, augmentation=True)

In [None]:
dataset.N

In [None]:
# The dataset can return an iterator.
it = dataset.iter(10)
print(next(it).shape)
del it

In [None]:
# Get all the data
X = dataset.get_all_data().flatten()

Display the histogram of the pixel densities after the forward map

In [None]:
plt.hist(X, 100)
print('min: {}'.format(np.min(X)))
print('max: {}'.format(np.max(X)))
plt.yscale('log')

In [None]:
# to free some memory
del X

Let us plot 16 samples

In [None]:
plot.audio.plot_signals(dataset.get_samples(N=16),nx=4,ny=4);
plt.suptitle("Real samples");

In [None]:
plot.audio.play_sound(dataset.get_samples(16)[0,:], fs=16000//downscale)

# Define parameters for the WGAN

In [None]:
time_str = 'piano_16k_wavegan'
global_path = '../saved_results'

name = 'WGAN' + '_' + time_str

## Parameters

In [None]:
from gantools import blocks
bn = False

md = 64

params_discriminator = dict()
params_discriminator['stride'] = [4,4,4,4,4]
params_discriminator['nfilter'] = [md, 2*md, 4*md, 8*md, 16*md]
params_discriminator['shape'] = [[25], [25], [25], [25], [25]]
params_discriminator['batch_norm'] = [bn, bn, bn, bn, bn]
params_discriminator['full'] = []
params_discriminator['minibatch_reg'] = False
params_discriminator['summary'] = True
params_discriminator['data_size'] = 1
params_discriminator['apply_phaseshuffle'] = True
params_discriminator['spectral_norm'] = True
params_discriminator['activation'] = blocks.lrelu


params_generator = dict()
params_generator['stride'] = [4, 4, 4, 4, 4]
params_generator['latent_dim'] = 100
params_generator['nfilter'] = [8*md, 4*md, 2*md, md, 1]
params_generator['shape'] = [[25], [25], [25], [25], [25]]
params_generator['batch_norm'] = [bn, bn, bn, bn]
params_generator['full'] = [256*md]
params_generator['summary'] = True
params_generator['non_lin'] = tf.nn.tanh
params_generator['activation'] = tf.nn.relu
params_generator['data_size'] = 1
params_generator['spectral_norm'] = True 
params_generator['in_conv_shape'] =[16]

params_optimization = dict()
params_optimization['batch_size'] = 64
params_optimization['epoch'] = 10000
params_optimization['n_critic'] = 5
params_optimization['generator'] = dict()
params_optimization['generator']['optimizer'] = 'adam'
params_optimization['generator']['kwargs'] = {'beta1':0.5, 'beta2':0.9}
params_optimization['generator']['learning_rate'] = 1e-4
params_optimization['discriminator'] = dict()
params_optimization['discriminator']['optimizer'] = 'adam'
params_optimization['discriminator']['kwargs'] = {'beta1':0.5, 'beta2':0.9}
params_optimization['discriminator']['learning_rate'] = 1e-4



# all parameters
params = dict()
params['net'] = dict() # All the parameters for the model
params['net']['generator'] = params_generator
params['net']['discriminator'] = params_discriminator
params['net']['prior_distribution'] = 'gaussian'
params['net']['shape'] = [1024*16, 1] # Shape of the image
params['net']['gamma_gp'] = 10 # Gradient penalty
params['net']['fs'] = 16000//downscale
params['net']['loss_type'] ='wasserstein'

params['optimization'] = params_optimization
params['summary_every'] = 100 # Tensorboard summaries every ** iterations
params['print_every'] = 50 # Console summaries every ** iterations
params['save_every'] = 1000 # Save the model every ** iterations
params['summary_dir'] = os.path.join(global_path, name +'_summary/')
params['save_dir'] = os.path.join(global_path, name + '_checkpoints/')
params['Nstats'] = 500



In [None]:
resume, params = utils.test_resume(True, params)
params['optimization']['epoch'] = 10000


# Build the model

In [None]:
wgan = GANsystem(WGAN, params)

# Train the model

In [None]:
wgan.train(dataset, resume=resume)

# Generate new samples
To have meaningful statistics, be sure to generate enough samples
* 2000 : 32 x 32
* 500 : 64 x 64
* 200 : 128 x 128


In [None]:
N = 16 # Number of samples
real_signals = dataset.get_samples(N=N)
fake_signals = np.squeeze(wgan.generate(N=N))

In [None]:
1

Display a few fake samples

In [None]:
plot.audio.plot_signals(gen_sample,nx=4,ny=4);
plt.suptitle("Fake samples");

In [None]:
plot.audio.plot_signals(real_signals,nx=4,ny=4);
plt.suptitle("Real samples");

In [None]:
import ltfatpy
from ltfatpy import plotdgtreal
def plot_sgram(signal, a = 256, M = 512, g='itersine', dynrange=80, **kwargs):
    c = ltfatpy.gabor.dgtreal.dgtreal(signal, g, a, M)[0]
    return plotdgtreal(c, a, M, dynrange=dynrange,**kwargs)

In [None]:
for i in range(4):
    print('Real')
    plot.audio.play_sound(real_signals[i,:], fs=16000//downscale)    
    print('Fake')
    plot.audio.play_sound(fake_signals[i,:], fs=16000//downscale)

In [None]:
for i in range(4):
    plt.figure(figsize=(15, 4))
    plt.subplot(121)
    plot_sgram(fake_signals[i].astype(np.float64), fs=16000//downscale);
    plt.title('Inpainted')
    plt.subplot(122)
    plot_sgram(real_signals[i].astype(np.float64), fs=16000//downscale);
    plt.title('Original')

# Evaluation of the sample quality

In [None]:
plot.audio.play_sound(gen_sample[0,:], fs=16000//downscale)