In [None]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import pandas as pd
import numpy as np
from gtda.time_series import SlidingWindow
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.keras.backend import set_session
config = tf.compat.v1.ConfigProto() 
config.gpu_options.allow_growth = True  
config.log_device_placement = True  
sess2 = tf.compat.v1.Session(config=config)
set_session(sess2) 
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras import layers
from tensorflow.keras.losses import MSE
from tensorflow.keras.models import Model, load_model
from kapre.time_frequency import Melspectrogram, Spectrogram
import librosa
import SpeechDownloader
import SpeechGenerator
import SpeechModels
from tqdm import tqdm

## Import Dataset

In [None]:
gscInfo, nCategs = SpeechDownloader.PrepareGoogleSpeechCmd(version=2, task='35word')
sr = 16000 
iLen = 16000
trainGen = SpeechGenerator.SpeechGen(gscInfo['train']['files'], gscInfo['train']['labels'], shuffle=True)
valGen   = SpeechGenerator.SpeechGen(gscInfo['val']['files'], gscInfo['val']['labels'], shuffle=True)
testGen  = SpeechGenerator.SpeechGen(gscInfo['test']['files'], gscInfo['test']['labels'], shuffle=False, batch_size=len(gscInfo['test']['files']))
testRGen = SpeechGenerator.SpeechGen(gscInfo['testREAL']['files'], gscInfo['testREAL']['labels'], shuffle=False, batch_size=len(gscInfo['testREAL']['files']))
audios, classes = valGen.__getitem__(5)
x_test, y_test = testGen.__getitem__(0)

## Load Model

In [None]:
model = SpeechModels.AttRNNSpeechModel(nCategs, samplingrate = sr, inputLength = None)#, rnn_func=L.LSTM)
model.load_weights('model-attRNN.h5')
model.compile(optimizer='adam', loss=['sparse_categorical_crossentropy'], metrics=['sparse_categorical_accuracy'])
model.summary()

## Attack

In [None]:
def pgd_attack(model,iterations, image, label, alpha, eps):
    gen_img = tf.identity(image)
    gen_img = tf.cast(gen_img,dtype=tf.dtypes.float32)
    gen_img = gen_img + tf.random.uniform(gen_img.get_shape().as_list(), minval=-eps, 
                                          maxval=eps, dtype=tf.dtypes.float32)
    x_temp = image
    for iter in range(iterations):
        imgv = tf.Variable(gen_img)
        with tf.GradientTape() as tape:
            tape.watch(imgv)
            predictions = model(imgv)
            loss = tf.keras.losses.CategoricalCrossentropy()(label, predictions)
            grads = tape.gradient(loss,imgv)
        signed_grads = tf.sign(grads)
        gen_img = gen_img + (alpha*signed_grads)
        gen_img = tf.clip_by_value(gen_img, image-eps, image+eps)
    return gen_img

In [None]:
eps = [0.1,0.3,0.5,0.7,0.9,1.0,2.0]
iterations = 5
alpha = [0.1,0.3,0.5,0.7,0.9,1.0]
take_size = 4890
accu_num = []
eps_list = []
alpha_list = []

for al in alpha:
    for item in eps:
        countadv = 0
        for i in tqdm(range(len(x_test))):
            audio = x_test[i,:].reshape(1,len(x_test[i,:]))
            label = np.array(tf.one_hot(y_test[i], max(set(y_test))+1)).reshape(1,max(set(y_test))+1)
            audioPred = model.predict(audio)
            audioPred = audioPred.argmax()
            adversary = pgd_attack(model,iterations,audio, label, alpha=al, eps=item)
            pred = model.predict(adversary)
            adversaryPred = pred[0].argmax()
            if audioPred == adversaryPred:
                countadv += 1

        print("Adversarial accuracy : ", countadv / take_size)
        accu_num.append(countadv / take_size)
        eps_list.append(item)
        alpha_list.append(al)

In [None]:
accu_num

In [None]:
eps_list

In [None]:
alpha_list