In [1]:
import numpy as np
import pandas as pd
from IPython.display import clear_output
from matplotlib import pyplot as plt
import os
from scipy import sparse
import pickle
import sys
from functools import partial

from art import config
from art.estimators.classification import BlackBoxClassifier, ClassGradientsMixin, ClassifierMixin, SklearnClassifier
from art.attacks.evasion import CarliniL2Method, SaliencyMapMethod, ZooAttack
from art.utils import to_categorical
from art.utils import load_dataset, get_file, compute_accuracy

In [2]:
os.chdir('/home/rcgonzal/DSC180Malware/m2v-adversarial-hindroid/')
!pwd

/home/rcgonzal/DSC180Malware/m2v-adversarial-hindroid


In [3]:
base_data = os.path.join('data/out/miniset-train')

In [4]:
def get_hindroid_features(app, base_data):
    if os.path.exists(app):
        apis = pd.read_csv(app, usecols=['api'], squeeze=True, dtype=str).unique()
    else:
        try:
            apis = pd.read_csv(os.path.join('data', 'out', 'all-apps', 'app-data', app+'.csv'), usecols=['api'], squeeze=True, dtype=str).unique()
        except FileNotFoundError as e:
            print("App not in app heap")
            raise e
    
    base_data_apis = pd.read_csv(os.path.join(base_data, 'api_map.csv'), index_col='api')
    
    A_row = pd.Series(np.zeros(base_data_apis.size), index=base_data_apis.index)
    A_row.loc[set(apis).intersection(base_data_apis.index)] = 1
    
    return A_row.values

In [18]:
test_apps = pd.read_csv('data/out/miniset-test/app_list.csv')
test_apps = test_apps.sample(100).app
test_apps_features = np.array([get_hindroid_features(app, base_data='data/out/miniset-train') for app in test_apps])
test_apps_features

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [6]:
test_apps_features.shape

(5, 2547149)

In [7]:
x = get_hindroid_features('com.microsoft.todos', base_data='data/out/miniset-train')
x

array([1., 0., 0., ..., 0., 0., 0.])

In [9]:
A = sparse.load_npz(os.path.join(base_data, 'hindroid', 'A_mat.npz'))
B = sparse.load_npz(os.path.join(base_data, 'hindroid', 'B_mat.npz'))
with open(os.path.join(base_data, 'hindroid', 'ABAT.mdl'), 'rb') as file:
    mdl = pickle.load(file)

def ABA_predict(x):
    print(f'Prediction', file=sys.stdout)
    features = x * B * A.T
    return mdl.predict(features)
    
ABA_predict([x,])

Prediction


array([0])

In [10]:
ABA_predict(test_apps_features)

Prediction


array([1, 1, 1, 1, 1])

In [37]:
hindroid_blackbox = BlackBoxClassifier(ABA_predict, x.shape, 2, clip_values=(0, 1))

In [49]:
zoo = ZooAttack(hindroid_blackbox, confidence=0.1, targeted=False, learning_rate=1e-1, max_iter=100,
                binary_search_steps=20, initial_const=1e-3, abort_early=True, use_resize=False, 
                use_importance=False, nb_parallel=10, batch_size=1, variable_h=0.25)

In [50]:
%time adv_ex = zoo.generate(test_apps_features)

Prediction
Prediction
Prediction
Prediction
Prediction


ZOO:   0%|          | 0/5 [00:00<?, ?it/s]

Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction
Prediction

KeyboardInterrupt: 

In [44]:
adv_ex

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [45]:
ABA_predict(adv_ex)

Prediction


array([1, 0, 1, 1, 1])

In [46]:
np.sum(adv_ex, axis=1)

array([2373., 9959.,  320.,  307.,  283.])

In [47]:
np.sum(test_apps_features, axis=1)

array([2373., 9959.,  320.,  307.,  283.])

In [81]:
ABA_predict(test_apps_features)

Prediction


array([1, 1, 1, 1, 1])

## Test with features

In [19]:
test_apps_commuted = test_apps_features * B * A.T
hindroid_blackbox_simple = SklearnClassifier(mdl)
zoo = ZooAttack(hindroid_blackbox_simple, confidence=0.0, targeted=False, learning_rate=1e-1, max_iter=1000,
                binary_search_steps=20, initial_const=1e-3, abort_early=True, use_resize=False, 
                use_importance=True, nb_parallel=10, batch_size=1)
adv_ex = zoo.generate(test_apps_commuted)
mdl.predict(adv_ex)

ZOO:   0%|          | 0/100 [00:00<?, ?it/s]

array([0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0])

In [20]:
np.sum((adv_ex == test_apps_commuted), axis=1)

array([448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448,
       448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448,
       448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448,
       448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448,
       448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448,
       448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448,
       448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448, 448,
       448, 448, 448, 448, 448, 448, 448, 448, 448])