In [1]:
!mkdir tmp

mkdir: cannot create directory ‘tmp’: File exists


In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from shutil import copyfile
import scipy.io.wavfile as wav
import struct
import time
import sys
from collections import namedtuple

import json
import os
import librosa
   
    
def training_step(model, optimizer, loss_fn, original, delta, rescale, target):
    with tf.GradientTape(persistent=True) as tape:
        tape.watch([delta])
#         print(delta.shape, rescale.shape)
        new_delta = tf.clip_by_value(delta, -2000, 2000)*rescale
        new_input = new_delta + original
        noise = tf.random.normal(new_input.shape, stddev=2)
        new_input = tf.clip_by_value(new_input+noise, -2**15, 2**15-1)
#         print('input:', new_input.shape)
        stfts = tf.signal.stft(new_input, frame_length=480, frame_step=160, fft_length=480)
        spectrograms = tf.abs(stfts)
#         print('stft:', stfts.shape)
        # Warp the linear scale spectrograms into the mel-scale.
        num_spectrogram_bins = stfts.shape[-1]
        lower_edge_hertz, upper_edge_hertz, num_mel_bins = 20.0, 7600.0, 80
        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
          num_mel_bins, num_spectrogram_bins, 16000, lower_edge_hertz,
          upper_edge_hertz)
#         print(linear_to_mel_weight_matrix.shape)
        mel_spectrograms = tf.tensordot(
          spectrograms, linear_to_mel_weight_matrix, 1)
#         print(mel_spectrograms.shape)
        mel_spectrograms.set_shape(spectrograms.shape[:-1].concatenate(
          linear_to_mel_weight_matrix.shape[-1:]))
#         print(mel_spectrograms.shape)
        # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
        log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

        # Compute MFCCs from log_mel_spectrograms and take the first 13.
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(
          log_mel_spectrograms)[..., :40]
        mfccs = tf.transpose(mfccs, perm=[0,2,1])
        mfccs = tf.expand_dims(mfccs, axis=-1)

        pred = model(mfccs)

        loss = loss_fn(target, pred)
    dd = tape.gradient(loss, [delta])
#             print('\n\n\nhere\n\n\n', grad, var)
    optimizer.apply_gradients(zip(dd,[delta]))
    return pred, loss


def attack(audio, files, target, model, root = 'tmp', optimizer = tf.optimizers.Adam(learning_rate=10), 
           loss_fn=tf.nn.sparse_softmax_cross_entropy_with_logits):
    batch_size = len(audio)
    delta = tf.Variable(np.zeros((batch_size, 16000), dtype=np.float32))
    rescale = tf.Variable(np.ones((batch_size, 1), dtype = np.float32))
    original = tf.convert_to_tensor(np.array(audio, dtype=np.float32))
    target = tf.convert_to_tensor(target)
    new_input = (original + delta).numpy()
    
    # Here we'll keep track of the best solution we've found so far
    final_deltas = [None]*batch_size



    # We'll make a bunch of iterations of gradient descent here
    #now = time.time()
    MAX = 10000
    first_hits = np.zeros((batch_size,))
    best_hits = np.zeros((batch_size,))
    start = time.time()
    for i in range(MAX):

        pred, loss = training_step(model, optimizer, loss_fn, original, delta, rescale, target)


        # Print out some debug information every 10 iterations.
        if i%100 == 0:
#             print(pred.shape, pred)
            print(time.time() - start)
            print(i, loss, np.argmax(pred, axis=1))


        # Actually do the optimization step
        for ii in range(batch_size):
            if (i%100 == 0 and np.argmax(pred[ii]) == target[ii]) \
               or (i == MAX-1 and final_deltas[ii] is None):
                temp_rescale = rescale.numpy()
                # Get the current constant
                if temp_rescale[ii][0]*2000 > np.max(np.abs(delta[ii])):
                    print("It's way over", np.max(np.abs(delta[ii]))/2000.0)
                    temp_rescale[ii][0] = np.max(np.abs(delta[ii]))/2000.0

                temp_rescale[ii][0] *= .9
                rescale.assign(temp_rescale)
                # Adjust the best solution found so far
                new_input[ii] = (original[ii] + delta[ii]).numpy()
                final_deltas[ii] = new_input[ii]

                print("Worked i=%d loss=%f bound=%f"%(ii, loss[ii], 2000*rescale[ii][0]))

                if (first_hits[ii] == 0):
                    print("First hit for audio {} at iteration {}".format(ii, i))
                    first_hits[ii]=i
                else:
                    best_hits[ii]=i

                # Just for debugging, save the adversarial example
                # to /tmp so we can see it if we want
                wav.write(f'{root}/{files[ii]}', 16000,
                          np.array(np.clip(np.round(new_input[ii]),
                                           -2**15, 2**15-1),dtype=np.int16))

    return final_deltas, first_hits, best_hits  

2023-02-27 16:38:43.031120: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-27 16:38:43.460582: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-02-27 16:38:43.499797: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-02-27 16:38:43.499825: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore 

In [4]:
# files = ["down/31f01a8d_nohash_4.wav", 'down/0b77ee66_nohash_1.wav', 
#          'left/0ff728b5_nohash_2.wav', 'left/f9273a21_nohash_0.wav',
#          'right/d278d8ef_nohash_1.wav', 'right/1e02ffc5_nohash_1.wav',
#          'yes/df280250_nohash_0.wav', 'yes/7846fd85_nohash_4.wav',
#          'no/baf01c1f_nohash_1.wav', 'no/e9bc5cc2_nohash_1.wav']

# audios = []
# for x in files:
#     fs, audio = wav.read('/kaggle/input/speech-commands-v2/' + x)
#     audios.append(audio)
# target = np.array([4]*len(audios))
# one_hot_target = tf.keras.utils.to_categorical(target, num_classes=10)
#model=tf.keras.models.load_model('/ML_KWS_TF2/work/CNN/CNN1/training/best/')
model = tf.keras.models.load_model('/kaggle/input/kwt-tf-mfcc/KWS_transformer')
#model.load_weights('/ML_KWS_TF2/work/CNN/CNN1/training/best/')
model.load_weights('/kaggle/input/kwt-tf-mfcc/best_weights')

OSError: No file or directory found at /ML_KWS_TF2/work/CNN/CNN1/training/best/

In [4]:
# deltas, firsts, bests = attack(audios, target, model)

In [5]:
# diff = new-audio
# source_DB = 20 * np.log10(np.max(np.abs(audio)))
# end_DB = 20 * np.log10(np.max(np.abs(new)))
# distortion = 20 * np.log10(np.max(np.abs(diff))) - source_DB
# source_DB, end_DB, distortion

In [6]:
# end_DB - source_DB

In [7]:
# 20*np.log10*(np.max(np.abs(diff)))

In [14]:
from itertools import chain
import random

commands = ['on', 'off', 'yes', 'no', 'stop', 'go', 'left', 'right', 'up', 'down']
data_dir = '/kaggle/input/speech-commands-v2/'
filenames = []
for i in range(len(commands)):
    filenames.append(tf.io.gfile.glob(str(data_dir) + commands[i] + "/*.wav"))

filenames = list(chain.from_iterable(filenames))
random.shuffle(filenames)

In [15]:
for i in range(2, 10):
    os.mkdir('tmp/' + commands[i])
    batch_size = 90
    audios = []
    files = []
    j = -1
    while len(audios) < batch_size:
        j += 1
        fs, audio = wav.read(filenames[j])
        if filenames[j].split('/')[-2] == commands[i]:
            continue
        if audio.shape[0] == 16000:
            audios.append(audio)
            files.append('_'.join(filenames[j].split('/')[-2:]))
    target = np.array([i]*len(audios))
    deltas, firsts, bests = attack(audios, files, target, 
                                   model, root=f'tmp/{commands[i]}')

FileNotFoundError: [Errno 2] No such file or directory: 'tmp/yes'

In [10]:
# 10 18
# 32 21
# 96 30


In [11]:
# import os
# os.chdir(r'/kaggle/working')

# !zip -r attack.zip tmp

# from IPython.display import FileLink

# FileLink(r'attack.zip')

In [12]:
# files