# Preamble

In [1]:
# json
import json

# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio
from torch import Tensor

import matplotlib.pyplot as plt
import librosa

import librosa.display
import IPython.display as ipd

import numpy as np
import pickle
import random
import torch
import librosa
from torch_specinv import griffin_lim
from torch_specinv.metrics import spectral_convergence as SC

from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn import metrics
import youtube_dl
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

import cvxpy as cp

In [2]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda=True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

CUDA Available:  True


In [3]:
FIXED_SAMPLE_RATE = 48000

# Load data

In [4]:
pickle_path = "/nfs/students/summer-term-2020/project-4/data/dataset1/dataset_resampled/"
training = pickle.load(open(pickle_path + "training.p","rb"))
validation = pickle.load(open(pickle_path + "validation.p","rb"))

In [5]:
max_length = 0

for sample in training:
    max_length = max(max_length, sample['data'][0].shape[0])

for sample in validation:
    max_length = max(max_length, sample['data'][0].shape[0])
    
print(max_length)

481489


In [6]:
def prepareData(data):
    zero_padded_data = torch.zeros(max_length)
    zero_padded_data[:data.shape[0]] = torch.from_numpy(data)
    return zero_padded_data

In [7]:
training_dataset = [(prepareData(sample['data'][0]), 1)
                        if sample['binary_class']=='positive' else (prepareData(sample['data'][0]), 0) 
                        for sample in training]
validation_dataset = [(prepareData(sample['data'][0]), 1)
                        if sample['binary_class']=='positive' else (prepareData(sample['data'][0]), 0) 
                        for sample in validation]

In [8]:
path_tum_sound = '/nfs/students/summer-term-2020/project-4/yan/tum.wav'
tum_sound,sr = librosa.load(path_tum_sound, sr=FIXED_SAMPLE_RATE)
padding = int((max_length - len(tum_sound))/2)
zero_padded_data = torch.zeros(max_length)
zero_padded_data[padding:padding+tum_sound.shape[0]] = torch.from_numpy(tum_sound)
tum_sound = zero_padded_data

# Model aspects

## Definition

In [9]:
class HybridCNN(nn.Module):
    def __init__(self, hidden_dim, numChunksList):
        super(HybridCNN, self).__init__()
        
        self.hidden_dim = hidden_dim 
        
        self.numChunksList = numChunksList # full sequence, half/half, 2sec split
            
        # initial normalization
        self.bn0 = nn.BatchNorm1d(1)
        
        ## init M5-mod with global pooling
        self.conv1 = nn.Conv1d(1, 128, 80, 4)
        self.bn1 = nn.BatchNorm1d(128)
        self.pool1 = nn.MaxPool1d(4)
        self.drop1 = nn.Dropout(p=0.2)
        
        self.conv2 = nn.Conv1d(128, 256, 3)
        self.bn2 = nn.BatchNorm1d(256)
        self.pool2 = nn.MaxPool1d(4)
        
        self.conv3 = nn.Conv1d(256, 512, 3)
        self.bn3 = nn.BatchNorm1d(512)
        self.pool3 = nn.MaxPool1d(4)
        
        self.conv4 = nn.Conv1d(512, 1024, 3)
        self.bn4 = nn.BatchNorm1d(1024)
        self.pool4 = nn.MaxPool1d(4)
        
        self.fc1 = nn.Linear(1024, self.hidden_dim)

        # init final FC
        self.fcN = nn.Linear(sum(self.numChunksList) * hidden_dim, 2)
    
    def forwardM5(self, x):
        x = self.conv1(x)
        x = F.relu(self.bn1(x))
        x = self.drop1(self.pool1(x))
        
        x = self.conv2(x)
        x = F.relu(self.bn2(x))
        x = self.pool2(x)
        x = self.conv3(x)
        x = F.relu(self.bn3(x))
        x = self.pool3(x)
        x = self.conv4(x)
        x = F.relu(self.bn4(x))
        x = self.pool4(x)
        x = F.avg_pool1d(x, kernel_size=x.size()[2:]) # global average pooling
        x = x.squeeze(2)
        x = self.fc1(x)
        return x
    
    def forward(self, x0):
        batch_size = x0.shape[0]
        
        ## first normalize batch
        x0 = x0.unsqueeze(1)
        x0 = self.bn0(x0)
        
        cnnResult = []
        for numChunks in self.numChunksList:
            chunks = torch.chunk(x0, numChunks, dim=2)
            
            for chunk in chunks:
                x = self.forwardM5(chunk) ## do one convolution
                x = x.unsqueeze(1)
                cnnResult.append(x)
        
        x = torch.cat(cnnResult, dim=2)
        x = x.squeeze(1)

        # final linear layer
        x = self.fcN(F.relu(x))

        return F.log_softmax(x,dim=1)

In [10]:
def validate(model, epoch=0):
    valid_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=1, shuffle=True)
    criterion = nn.CrossEntropyLoss()
    
    model.eval()

    y_true = []
    y_pred = []
    running_loss = 0.0
    
    with torch.no_grad():
        for i,data in enumerate(valid_loader,0):
            samples, labels = [x.cuda() for x in data]
            outputs = model(samples)

            y_pred.append(torch.max(outputs.data, 1)[1].item())
            y_true.append(labels.item())
            
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
    print('[%d] valid-loss: %.3f' % (epoch + 1, running_loss / (i+1)))
            
    return reportScore(y_true, y_pred), running_loss / (i+1)

In [11]:
def reportScore(y_true, y_pred):
    print("\tAccuracy:\t" + str(metrics.accuracy_score(y_true,y_pred)))
    print("\tPrecision:\t" + str(metrics.precision_score(y_true,y_pred)))
    print("\tRecall:   \t" + str(metrics.recall_score(y_true,y_pred)))
    print("\tF1-score:\t" + str(metrics.f1_score(y_true,y_pred)))

    #tn, fp, fn, tp = metrics.confusion_matrix(y_true,y_pred).ravel()
    return metrics.accuracy_score(y_true,y_pred)

## Loading model

In [12]:
model_state_dict_path = "/nfs/students/summer-term-2020/project-4/yan/models/best_model_state_dict.pt"
model = HybridCNN(hidden_dim=100, numChunksList=[5,2,1])
model.load_state_dict(torch.load(model_state_dict_path))
model.eval() # important! we don't optimize the model anymore

HybridCNN(
  (bn0): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1): Conv1d(1, 128, kernel_size=(80,), stride=(4,))
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (drop1): Dropout(p=0.2, inplace=False)
  (conv2): Conv1d(128, 256, kernel_size=(3,), stride=(1,))
  (bn2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(256, 512, kernel_size=(3,), stride=(1,))
  (bn3): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv1d(512, 1024, kernel_size=(3,), stride=(1,))
  (bn4): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (po

# Attacks

## General framework

In [13]:
def testAttack(model, data_loader, attack, epsilon, num_iter=1, early_stopping=-1):
    # https://pytorch.org/tutorials/beginner/fgsm_tutorial.html
    model.eval().cuda()
    correct = 0
    adv_examples = []
    
    success = 0
    for i, data in tqdm(list(enumerate(data_loader,0)), position=0):
        data, target = [x.cuda() for x in data]

        output = model(data)
        _, init_pred = torch.max(output.data, 1) # get the index of the max log-probability

        if init_pred.item() != target.item():
            continue # If the initial prediction is wrong, dont bother attacking, just move on

        perturbed_data = attack(model, data, target, epsilon, num_iter)

        # Re-classify the perturbed image
        output = model(perturbed_data)
        # Check for success
        final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        
        if final_pred.item() == target.item():
            correct += 1
            # Special case for saving 0 epsilon examples
            if (epsilon == 0) and (len(adv_examples) < 50):
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append( (i, init_pred.item(), final_pred.item(), adv_ex) )
        else:
            success += 1
            if early_stopping > 0:
                print("Found adversarial example")
                
            # Save some adv examples for visualization later
            if len(adv_examples) < 50:
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append( (i, init_pred.item(), final_pred.item(), adv_ex) )
        
        if early_stopping > 0 and len(adv_examples) == early_stopping:
            return -1, adv_examples
        
    final_acc = correct/float(len(data_loader))
    print("Epsilon: {}\tIterations: {}\tTest Accuracy = {} / {} = {}".format(epsilon, num_iter, correct, len(data_loader), final_acc))

    return final_acc, adv_examples, success, correct

In [14]:
windowsize = 2048
window = torch.hann_window(windowsize)

def audio2spec(y):
    return torch.stft(y, windowsize, window=window).pow(2).sum(2).sqrt()

def drawSpec(mag):
    log_S = librosa.power_to_db(mag.numpy(), ref=np.max(mag.numpy()))
    plt.figure(figsize=(12,2))
    ax = librosa.display.specshow(log_S, sr=FIXED_SAMPLE_RATE, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')

## New attack

In [15]:
sample1 = training_dataset[1][0]

In [18]:
ipd.display(ipd.Audio(sample1, rate=FIXED_SAMPLE_RATE))

In [16]:
torch.arange(0, 5, 1.2) % 1.0

tensor([0.0000, 0.2000, 0.4000, 0.6000, 0.8000])

In [17]:
def speedup(sample, speedup_rate):
    if speedup_rate == 1:
        return sample
    #assert speedup_rate > 1 # we only support speedups right now, not slow-downs
    
    n_fft = 2048 # windowsize
    window = torch.hann_window(windowsize)
    hop_length = int(np.floor(n_fft / 4))
    n_freq = 1177 # somehow depends on speedup_rate
    
    # speedup
    stft = torch.stft(sample1, n_fft, hop_length=hop_length, window=window)
    phase_advance = torch.linspace(0, math.pi * hop_length, n_freq)
    vocoded = AF.phase_vocoder(stft, rate=speedup_rate, phase_advance=phase_advance)
    istft = AF.istft(vocoded, n_fft, hop_length=hop_length, window=window)
    
    max_length = sample.shape[0]

    if speedup_rate > 1: 
        # faster means output is smaller -> padding
        pad_l = int((max_length - istft.shape[0])/2)
        pad_r = max_length - (pad_l + istft.shape[0])
        return F.pad(istft, (pad_l, pad_r))
    else:
        # slower means longer -> chopping of
        low = int((istft.shape[0] - max_length)/2)
        return istft[low:low+max_length]

In [19]:
import math
import 

In [18]:
speedup_rate = 0.8
final_sample = speedup(sample1, speedup_rate)

NameError: name 'math' is not defined

In [167]:
ipd.display(ipd.Audio(final_sample, rate=FIXED_SAMPLE_RATE))

## Interpolation

In [None]:
'''
    Interpolation attack: 
        - take gradient wrt. the interpolation parameters a,b 
        
    Parameters:
        - tum_sound: sound to be inserted
        - epsilon: gradient step size
        - num_iterations: PGD iterations
        - clamping parameters for a, b (4 in total): define the max/min of interpolation volume
'''
def insertion_attack(model, x, y, epsilon, num_iter=1):
    global tum_sound
    
    x = x.clone().detach()
    a = torch.tensor(1.0).cuda() # original sound volume
    b = torch.tensor(1.0).cuda() # inserted sound volume
    tum_sound = tum_sound.cuda()
    
    for i in range(num_iter):
        a.requires_grad_()
        b.requires_grad_()
        
        loss = F.nll_loss(model(a * x + b * tum_sound), y)
        model.zero_grad()
        loss.backward()
        
        a = (a + epsilon * a.grad.data).clamp(0.8,1).detach()
        b = (b + epsilon * b.grad.data).clamp(0.05,0.1).detach()

    return (a * x + b * tum_sound).clamp(-1, 1)

In [None]:
valid_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=1, shuffle=False)

accuracies = []
examples = []

# Run test for each epsilon
epsilons = [.3]

for eps in epsilons:
    acc, ex = testAttack(model, valid_loader, insertion_attack, eps, num_iter=10)
    accuracies.append(acc)
    examples.append(ex)

## Interpolation attack

In [182]:
'''
    Interpolation attack: 
        - take gradient wrt. the interpolation parameters a,b 
        
    Parameters:
        - tum_sound: sound to be inserted
        - epsilon: gradient step size
        - num_iterations: PGD iterations
        - clamping parameters for a, b (4 in total): define the max/min of interpolation volume
'''
def insertion_attack(model, x, y, epsilon, num_iter=1):
    global tum_sound
    
    x = x.clone().detach()
    a = torch.tensor(1.0).cuda() # original sound volume
    b = torch.tensor(1.0).cuda() # inserted sound volume
    tum_sound = tum_sound.cuda()
    
    for i in range(num_iter):
        a.requires_grad_()
        b.requires_grad_()
        
        loss = F.nll_loss(model(a * x + b * tum_sound), y)
        model.zero_grad()
        loss.backward()
        
        a = (a + epsilon * a.grad.data).clamp(0.8,1).detach()
        b = (b + epsilon * b.grad.data).clamp(0.05,0.1).detach()

    return (a * x + b * tum_sound).clamp(-1, 1)

In [181]:
valid_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=1, shuffle=False)

accuracies = []
examples = []

# Run test for each epsilon
epsilons = [.3]

for eps in epsilons:
    acc, ex = testAttack(model, valid_loader, insertion_attack, eps, num_iter=10)
    accuracies.append(acc)
    examples.append(ex)

100%|██████████| 1687/1687 [16:02<00:00,  1.75it/s]

Epsilon: 0.3	Iterations: 10	Test Accuracy = 1489 / 1687 = 0.8826318909306461





    Epsilon: 0.3	Iterations: 10	Test Accuracy = 1489 / 1687 = 0.8826318909306461

In [183]:
eps = -1
adversarial_samples = {}

for sample in examples[eps]:
    if sample[1] == 1: # ie. previously correctly classified as EM
        print("found good adversarial sample: ")
        example_id = sample[0]
        print("\tSample id: " + str(example_id))
        adversarial_samples.update({example_id : sample})

found good adversarial sample: 
	Sample id: 74
found good adversarial sample: 
	Sample id: 242
found good adversarial sample: 
	Sample id: 345
found good adversarial sample: 
	Sample id: 570
found good adversarial sample: 
	Sample id: 751
found good adversarial sample: 
	Sample id: 769
found good adversarial sample: 
	Sample id: 1137
found good adversarial sample: 
	Sample id: 1149
found good adversarial sample: 
	Sample id: 1472
found good adversarial sample: 
	Sample id: 1496


In [None]:
sample = adversarial_samples[1149] #random.sample(list(adversarial_samples.values()),1)[0]
print(sample[0])
original = validation_dataset[sample[0]][0]
adversarial = sample[-1]

ipd.display(ipd.Audio(original,    rate=FIXED_SAMPLE_RATE, normalize=False))
ipd.display(ipd.Audio(adversarial, rate=FIXED_SAMPLE_RATE, normalize=False))

## Volume attacks

In [28]:
def volume_attack(model, x, y, epsilon, num_iter=1):
    x = x.clone().detach()
    a = torch.tensor(1.0).cuda()
    
    for i in range(num_iter):
        a.requires_grad_()
        
        loss = F.nll_loss(model(a * x), y)
        model.zero_grad()
        loss.backward()
        
        a = (a + epsilon * a.grad.data).clamp(0.2,2).detach()
    
    return (a * x).clamp(-1, 1)

In [None]:
valid_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=1, shuffle=False)

accuracies = []
examples = []

# Run test for each epsilon
epsilons = [.3]

for eps in epsilons:
    acc, ex, _, _ = testAttack(model, valid_loader, volume_attack, eps, num_iter=10)
    accuracies.append(acc)
    examples.append(ex)

    Epsilon: 0.3	Iterations: 10	Test Accuracy = 1387 / 1687 = 0.8221695317131001

In [190]:
eps = -1
adversarial_samples = {}

for sample in examples[eps]:
    if sample[1] == 1: # ie. previously correctly classified as EM
        print("found good adversarial sample: ")
        example_id = sample[0]
        print("\tSample id: " + str(example_id))
        adversarial_samples.update({example_id : sample})

found good adversarial sample: 
	Sample id: 7


In [None]:
sample = random.sample(list(adversarial_samples.values()),1)[0]
print(sample[0])
original = validation_dataset[sample[0]][0]
adversarial = sample[-1]

ipd.display(ipd.Audio(original,    rate=FIXED_SAMPLE_RATE, normalize=False))
ipd.display(ipd.Audio(adversarial, rate=FIXED_SAMPLE_RATE, normalize=False))

## Simple PGD (SPGD) 

In [None]:
def spgd_attack(model, x, y, epsilon, num_iter=1):
    
    perturbed_sample = x
    
    for i in range(num_iter):
        perturbed_sample = perturbed_sample.clone().detach().requires_grad_(True).cuda()
        loss = F.nll_loss(model(perturbed_sample), y)
        model.zero_grad()
        loss.backward()
        
        perturbed_sample = perturbed_sample + epsilon * perturbed_sample.grad.data
        perturbed_sample = (perturbed_sample).clamp(-1, 1) # simple clamp projection

    return perturbed_sample

In [None]:
valid_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=1, shuffle=False)

accuracies = []
examples = []

# Run test for each epsilon
epsilons = [.05, 0.1]

for eps in epsilons:
    print(eps)
    acc, ex = testAttack(model, valid_loader, spgd_attack, eps, num_iter=20)
    accuracies.append(acc)
    examples.append(ex)

     30 min for 20 iterations each
     
     Epsilon: 0.01	Iterations: 20	Test Accuracy = 1466 / 1687 = 0.8689982216953172
     Epsilon: 0.05	Iterations: 20	Test Accuracy = 1367 / 1687 = 0.8103141671606402
     Epsilon: 0.1	Iterations: 20	Test Accuracy = 1267 / 1687 = 0.7510373443983402

In [None]:
eps = -1
adversarial_samples = {}

for sample in examples[eps]:
    if sample[1] == 1: # ie. previously correctly classified as EM
        print("found good adversarial sample: ")
        example_id = sample[0]
        print("\tSample id: " + str(example_id))
        adversarial_samples.update({example_id : sample})

In [None]:
sample = adversarial_samples[9] #random.sample(list(adversarial_samples.values()),1)[0]
print(sample[0])
original = validation_dataset[sample[0]][0]
adversarial = sample[-1]

ipd.display(ipd.Audio(original,    rate=FIXED_SAMPLE_RATE, normalize=False))
ipd.display(ipd.Audio(adversarial, rate=FIXED_SAMPLE_RATE, normalize=False))

## FGSM method

In [16]:
def fgsm_attack(model, x, y, epsilon, num_iter=1):
    x.requires_grad = True
    loss = F.nll_loss(model(x), y)
    model.zero_grad()
    loss.backward()
    perturbed_sample = x + epsilon * x.grad.data.sign()
    return perturbed_sample

In [17]:
valid_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=1, shuffle=False)

accuracies = []
examples = []

# Run test for each epsilon
epsilons = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1] 
_, _, _, TPTN = testAttack(model, valid_loader, fgsm_attack, epsilon=0, num_iter=1)

print("Start attack")
for eps in epsilons:
    acc, ex, success, _ = testAttack(model, valid_loader, fgsm_attack, eps, num_iter=1)
    print("\tSuccess Rate: {} / {} = {}".format(success, TPTN, success/float(TPTN)))
    accuracies.append(acc)
    examples.append(ex)
    print()

100%|██████████| 1687/1687 [02:44<00:00, 10.24it/s]


Epsilon: 0	Iterations: 1	Test Accuracy = 1503 / 1687 = 0.8909306461173682
Start attack


100%|██████████| 1687/1687 [02:46<00:00, 10.12it/s]


Epsilon: 0.001	Iterations: 1	Test Accuracy = 1431 / 1687 = 0.8482513337285121
	Success Rate: 72 / 1503 = 0.04790419161676647



100%|██████████| 1687/1687 [02:46<00:00, 10.15it/s]


Epsilon: 0.002	Iterations: 1	Test Accuracy = 1357 / 1687 = 0.8043864848844102
	Success Rate: 146 / 1503 = 0.09713905522288756



100%|██████████| 1687/1687 [02:45<00:00, 10.20it/s]


Epsilon: 0.005	Iterations: 1	Test Accuracy = 1104 / 1687 = 0.6544161232957914
	Success Rate: 399 / 1503 = 0.2654690618762475



100%|██████████| 1687/1687 [02:45<00:00, 10.22it/s]


Epsilon: 0.01	Iterations: 1	Test Accuracy = 891 / 1687 = 0.5281564908120925
	Success Rate: 612 / 1503 = 0.40718562874251496



100%|██████████| 1687/1687 [02:45<00:00, 10.21it/s]


Epsilon: 0.02	Iterations: 1	Test Accuracy = 726 / 1687 = 0.4303497332542976
	Success Rate: 777 / 1503 = 0.5169660678642715



100%|██████████| 1687/1687 [02:44<00:00, 10.23it/s]


Epsilon: 0.05	Iterations: 1	Test Accuracy = 863 / 1687 = 0.5115589804386484
	Success Rate: 640 / 1503 = 0.4258150365934797



100%|██████████| 1687/1687 [02:45<00:00, 10.22it/s]


Epsilon: 0.1	Iterations: 1	Test Accuracy = 982 / 1687 = 0.5820983995257855
	Success Rate: 521 / 1503 = 0.3466400532268796



100%|██████████| 1687/1687 [02:45<00:00, 10.20it/s]


Epsilon: 0.2	Iterations: 1	Test Accuracy = 882 / 1687 = 0.5228215767634855
	Success Rate: 621 / 1503 = 0.41317365269461076



100%|██████████| 1687/1687 [02:44<00:00, 10.23it/s]


Epsilon: 0.5	Iterations: 1	Test Accuracy = 793 / 1687 = 0.4700652045050385
	Success Rate: 710 / 1503 = 0.47238855622089154



100%|██████████| 1687/1687 [02:44<00:00, 10.23it/s]

Epsilon: 1	Iterations: 1	Test Accuracy = 762 / 1687 = 0.45168938944872555
	Success Rate: 741 / 1503 = 0.4930139720558882






    Epsilon: 0	Iterations: 1	Test Accuracy = 1503 / 1687 = 0.8909306461173682
    Epsilon: 0.001	Iterations: 1	Test Accuracy = 1431 / 1687 = 0.8482513337285121
        Success Rate: 72 / 1503 = 0.04790419161676647
    Epsilon: 0.002	Iterations: 1	Test Accuracy = 1357 / 1687 = 0.8043864848844102
        Success Rate: 146 / 1503 = 0.09713905522288756
    Epsilon: 0.005	Iterations: 1	Test Accuracy = 1104 / 1687 = 0.6544161232957914
        Success Rate: 399 / 1503 = 0.2654690618762475
    Epsilon: 0.01	Iterations: 1	Test Accuracy = 891 / 1687 = 0.5281564908120925
        Success Rate: 612 / 1503 = 0.40718562874251496
    Epsilon: 0.02	Iterations: 1	Test Accuracy = 726 / 1687 = 0.4303497332542976
        Success Rate: 777 / 1503 = 0.5169660678642715
    Epsilon: 0.05	Iterations: 1	Test Accuracy = 863 / 1687 = 0.5115589804386484
        Success Rate: 640 / 1503 = 0.4258150365934797
    Epsilon: 0.1	Iterations: 1	Test Accuracy = 982 / 1687 = 0.5820983995257855
        Success Rate: 521 / 1503 = 0.3466400532268796
    Epsilon: 0.2	Iterations: 1	Test Accuracy = 882 / 1687 = 0.5228215767634855
        Success Rate: 621 / 1503 = 0.41317365269461076
    Epsilon: 0.5	Iterations: 1	Test Accuracy = 793 / 1687 = 0.4700652045050385
        Success Rate: 710 / 1503 = 0.47238855622089154
    Epsilon: 1	Iterations: 1	Test Accuracy = 762 / 1687 = 0.45168938944872555
        Success Rate: 741 / 1503 = 0.4930139720558882

72, 146, 399, 612, 777, 640, 521, 621, 710, 741

In [33]:
eps = -1
adversarial_samples = {}

for sample in examples[eps]:
    if sample[1] == 1: # ie. previously correctly classified as EM
        print("found good adversarial sample: ")
        example_id = sample[0]
        print("\tSample id: " + str(example_id))
        adversarial_samples.update({example_id : sample})

found good adversarial sample: 
	Sample id: 9
found good adversarial sample: 
	Sample id: 16
found good adversarial sample: 
	Sample id: 31
found good adversarial sample: 
	Sample id: 38
found good adversarial sample: 
	Sample id: 43


In [None]:
sample = random.sample(list(adversarial_samples.values()),1)[0]
print(sample[0])
original = validation_dataset[sample[0]][0]
adversarial = sample[-1]

ipd.display(ipd.Audio(original,    rate=FIXED_SAMPLE_RATE))
ipd.display(ipd.Audio(adversarial, rate=FIXED_SAMPLE_RATE, normalize=False))

In [None]:
drawSpec(audio2spec(torch.tensor(original)))
drawSpec(audio2spec(torch.tensor(adversarial)))

# Misc

In [99]:
## exec break