In [5]:
from __future__ import absolute_import, division, print_function, unicode_literals
from utils import get_data


# 헬퍼 라이브러리들
import numpy as np
import os, librosa, time
import IPython.display as ipd
from tensorflow.keras.layers import *
from tensorflow.keras import Model, Sequential
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Import TensorFlow
import tensorflow as tf

In [6]:
generate_path = './generated_noise/'
feature = 'seq'
audio_path = '/root/datasets/ai_challenge/NOISEX/all/'
resample_sr = 16000
length = 4
train_data, train_label, label_list = get_data(feature=feature,resample_sr=resample_sr,length=length,audio_path=audio_path)

1/45
2/45
3/45
4/45
5/45
6/45
7/45
8/45
9/45
10/45
11/45
12/45
13/45
14/45
15/45
16/45
17/45
18/45
19/45
20/45
21/45
22/45
23/45
24/45
25/45
26/45
27/45
28/45
29/45
30/45
31/45
32/45
33/45
34/45
35/45
36/45
37/45
38/45
39/45
40/45
41/45
42/45
43/45
44/45
45/45
data preprocessing complete, data feature is seq


In [7]:
strategy = tf.distribute.MirroredStrategy()
print ('GPU number: {}'.format(strategy.num_replicas_in_sync))

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
GPU number: 1


In [8]:
BUFFER_SIZE = len(train_data)
class_num = len(label_list)
BATCH_SIZE_PER_REPLICA = 32
GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
shape = train_data[0].shape
EPOCHS = 100
noise_dim = 30
DATA_SHAPE = train_data[0].shape

with strategy.scope():
    train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_label)).shuffle(BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE) 
    train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)

def build_generator(output_shape=shape, class_num=class_num, stddev=0.2, z_dim=noise_dim):
    noise = Input(shape=(z_dim,))
    label = Input(shape=(1,), dtype='int32')
    label_embedding = Flatten()(Embedding(class_num, z_dim)(label))

    model_input = Concatenate()([noise, label_embedding])
    
    x = Dense(400, activation='relu')(model_input)
    if tf.rank(x) == 2:
        x = tf.expand_dims(x, axis=1)
    x = LSTM(100, return_sequences=True)(x)
    
    if feature == 'seq':
        x = Dense(output_shape[0], activation='tanh')(x)
        x = Flatten()(x)
        output = Reshape(output_shape)(x)
    else:
        x = Dense(output_shape[0]*output_shape[1], activation='tanh')(x)
        output = Reshape(output_shape)(x)

    return Model([noise, label], output)

def build_discriminator(input_shape=shape, class_num=class_num, stddev=0.2):
    noise_input = Input(shape=input_shape)
    reshaped_noise = Flatten()(noise_input)

    noise = Input(shape=input_shape)
    label = Input(shape=(1,), dtype='int32')

    label_embedding = Flatten()(Embedding(class_num, np.prod(input_shape))(label))
    flat_noise = Flatten()(noise)

    x = Multiply()([flat_noise, label_embedding])
    if tf.rank(x) == 2:
        x = tf.expand_dims(x, axis=-1)
#     x = tf.transpose(x,[0,2,1])
#     x = AveragePooling1D()(x)
#     x = LSTM(512, return_sequences=True)(x)
    x = LSTM(100)(x)
#     x = Flatten()(x)
#     x = LSTM(64)(x)
#     x = tf.expand_dims(x, axis=1)
#     x = LSTM(64)(x)
    output = Dense(2, activation='softmax')(x)
    

    return Model([noise, label], output)

In [13]:
# 체크포인트들을 저장하기 위해서 체크포인트 디렉토리를 생성합니다.
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")

with strategy.scope():
    # reduction을 `none`으로 설정합니다. 그래서 우리는 축소를 나중에 하고,
    # 또는 loss_fn = tf.keras.losses.sparse_categorical_crossentropy를 사용해도 됩니다.
    gen_cross_entropy = tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
    dis_cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
    
    
    def generator_loss(fake_output):
        return tf.nn.compute_average_loss(gen_cross_entropy(tf.ones_like(fake_output), fake_output), global_batch_size=GLOBAL_BATCH_SIZE)

    def discriminator_loss(real_output, fake_output):
        #real_output = (batch, 2)
        real_loss = tf.nn.compute_average_loss(dis_cross_entropy(tf.ones(real_output.shape[0],1), real_output), global_batch_size=GLOBAL_BATCH_SIZE)
        fake_loss = tf.nn.compute_average_loss(dis_cross_entropy(tf.zeros((fake_output.shape[0],1)), fake_output), global_batch_size=GLOBAL_BATCH_SIZE)
        return tf.math.divide_no_nan(tf.math.add(real_loss,fake_loss),2)

with strategy.scope():
    gen_loss = tf.keras.metrics.Mean(name='gen_loss')
    dis_loss = tf.keras.metrics.Mean(name='dis_loss')

    dis_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
      name='dis_accuracy')
    
    # 모델과 옵티마이저는 `strategy.scope`에서 만들어져야 합니다.
    generator = build_generator()
    discriminator = build_discriminator()
#     generator_optimizer = tf.keras.optimizers.Adam(0.001)
#     discriminator_optimizer = tf.keras.optimizers.Adam(0.001)
    generator_optimizer = tf.keras.optimizers.SGD(0.03)
    discriminator_optimizer = tf.keras.optimizers.SGD(0.03)

    checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                    discriminator_optimizer=discriminator_optimizer,
                                    generator=generator,
                                    discriminator=discriminator)
    
    def train_step(noise, label):
        random_noise = tf.random.normal([noise.shape[0], noise_dim],dtype=tf.float32)
        noise = tf.cast(noise,dtype=tf.float32)
        label = tf.expand_dims(label, axis=-1)
        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            generated_noise = generator([random_noise, label], training=True)

            real_output = discriminator([noise, label], training=True)
            fake_output = discriminator([generated_noise, label], training=True)

            g_loss = generator_loss(fake_output)
            d_loss = discriminator_loss(real_output, fake_output)
            gen_loss.update_state(g_loss)
            dis_loss.update_state(d_loss)

        gradients_of_generator = gen_tape.gradient(g_loss, generator.trainable_variables)
        gradients_of_discriminator = disc_tape.gradient(d_loss, discriminator.trainable_variables)
        
        generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
        discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))


        dis_accuracy.update_state(tf.ones((real_output[0],1)), real_output)
        dis_accuracy.update_state(tf.zeros((fake_output[0],1)), fake_output)
        return g_loss


In [14]:
discriminator.summary()
# g = build_generator()
# d = build_discriminator()
# random_noise = tf.random.normal([2, noise_dim],dtype=tf.float32)
# s = g([random_noise, tf.expand_dims(tf.constant([[1],[1]]), axis=-1)])
# k = d([tf.constant([train_data[0], train_data[1]]), tf.expand_dims(tf.constant([[1],[1]]), axis=-1)], training=False)
# b = d([np.expand_dims(s , axis=-1), tf.expand_dims(tf.constant([1]), axis=-1)], training=False)
# print(tf.ones_like(k[0]))

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_9 (InputLayer)            [(None, 64000, 1)]   0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 1, 64000)     960000      input_10[0][0]                   
__________________________________________________________________________________________________
flatten_9 (Flatten)             (None, 64000)        0           input_9[0][0]                    
____________________________________________________________________________________________

In [15]:
generator.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 1, 30)        450         input_7[0][0]                    
__________________________________________________________________________________________________
input_6 (InputLayer)            [(None, 30)]         0                                            
__________________________________________________________________________________________________
flatten_5 (Flatten)             (None, 30)           0           embedding_2[0][0]                
____________________________________________________________________________________________

In [16]:
with strategy.scope():
    # `experimental_run_v2`는 주어진 계산을 복사하고,
    # 분산된 입력으로 계산을 수행합니다.
        
    def distributed_train_step(noise_batch, label_batch):
        per_replica_losses = strategy.run(train_step, args=(noise_batch, label_batch))
        return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None)
    
    for epoch in range(EPOCHS):
        # 훈련 루프
        start = time.time()
        g_loss, step = 0., 0
        for noise_batch, label_batch in train_dist_dataset:
            distributed_train_step(noise_batch, label_batch)
            step += 1


        if epoch % 2 == 0:
            checkpoint.save(checkpoint_prefix)
        print (f'{epoch}: d_loss: {dis_loss.result()}, d_accuracy: {dis_accuracy.result()*100}%, g_loss: {gen_loss.result()}, time: {time.time() - start}')


        gen_loss.reset_states()
        dis_accuracy.reset_states()
        dis_loss.reset_states()

INFO:tensorflow:Error reported to Coordinator: Shapes of all inputs must match: values[0].shape = [2] != values[1].shape = [] [Op:Pack]
Traceback (most recent call last):
  File "/root/anaconda3/envs/ten2.2/lib/python3.7/site-packages/tensorflow/python/training/coordinator.py", line 297, in stop_on_exception
    yield
  File "/root/anaconda3/envs/ten2.2/lib/python3.7/site-packages/tensorflow/python/distribute/mirrored_strategy.py", line 998, in run
    self.main_result = self.main_fn(*self.main_args, **self.main_kwargs)
  File "/root/anaconda3/envs/ten2.2/lib/python3.7/site-packages/tensorflow/python/autograph/impl/api.py", line 282, in wrapper
    return func(*args, **kwargs)
  File "<ipython-input-13-040d8f6a30a6>", line 63, in train_step
    dis_accuracy.update_state(tf.ones((real_output[0],1)), real_output)
  File "/root/anaconda3/envs/ten2.2/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2976, in ones
    shape = ops.convert_to_tensor(shape, dtype=dtypes.int

InvalidArgumentError: Shapes of all inputs must match: values[0].shape = [2] != values[1].shape = [] [Op:Pack]

In [None]:
# eval_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
#       name='eval_accuracy')

# new_model = create_model()
# new_optimizer = tf.keras.optimizers.Adam()

# test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(GLOBAL_BATCH_SIZE)
# @tf.function
# def eval_step(images, labels):
#     predictions = new_model(images, training=False)
#     eval_accuracy(labels, predictions)
    
# checkpoint = tf.train.Checkpoint(optimizer=new_optimizer, model=new_model)
# checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

# for images, labels in test_dataset:
#     eval_step(images, labels)

# print ('전략을 사용하지 않고, 저장된 모델을 복원한 후의 정확도: {}'.format(
#     eval_accuracy.result()*100))

gen = build_generator()
new_optimizer = tf.keras.optimizers.Adam()
checkpoint = tf.train.Checkpoint(generator_optimizer=new_optimizer, generator=gen)
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
def sample_noises(generator, epoch):
    noise = np.random.normal(0, 1, (class_num,noise_dim))
    sampled_labels = np.arange(0, class_num).reshape(-1,1)

    gen_sound = generator.predict([noise, sampled_labels])
    sampled_labels = np.arange(0, class_num).reshape(-1)
    for i, j in enumerate(sampled_labels):
        data = None
        if feature == 'stft':
            data = librosa.istft(gen_sound[i])
        elif feature == 'mfcc':
            data = librosa.feature.inverse.mfcc_to_audio(gen_sound[i].T, resample_sr)
        elif feature == 'seq':
            data = gen_sound[i]
        else:
            raise ValueError('wrong feature')

        librosa.output.write_wav(os.path.join(generate_path, f'{epoch}_{label_list[j]}.wav'), data, resample_sr, norm=True)
        print(f'{epoch}_{label_list[j]}.wav')
        
sample_noises(generator, 17)

In [None]:
### ipd.Audio(generate_path+'17_destroyerengine.wav')