In [1]:
%load_ext autoreload
%autoreload 2

In [13]:
import sys
sys.path.append('..')

import warnings
warnings.filterwarnings('ignore')

In [3]:
import numpy as np
import pandas as pd
from os import listdir
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegressionCV
from scipy.sparse import csr_matrix, save_npz, load_npz

from trickster.saliency import *

MANIFEST_FEATURES = [    # corresponds to...
    'provider',          # "Hardware Components" 
    'permission',        # "Permissions"
    'activity',          # "Components" (185,729 / 218,951)
    'service_receiver',  # "Components" (33,222  / 218,951)
    'intent'             # "Intents"
]                        # ... in the Grosse et al. paper

seed = 2018

In [4]:
# Record hashes corresponding to malware applications
df = pd.read_csv('data/drebin_malware_sha256.csv')
malware_hashes = set(df['sha256'])

In [5]:
data_dir = 'data/drebin/'

# Load the data and record the feature set
data, labels, features = [], [], set()
subset = 1000 # use only a subset of the dataset

for file_path in listdir(data_dir)[:subset]:
    with open(data_dir + file_path) as f:
        lines = [x.strip() for x in f]
        if lines == '':
            continue
        data.append(lines)
        features |= set(lines)
        label = 1 if file_path in malware_hashes else 0
        labels.append(label)

In [6]:
# Provide statistics about the feature classes in the DREBIN dataset

classes, seen = {}, set()
for d in data:
    for feature in d:
        if feature in seen:
            continue
        seen.add(feature)
        f = feature.split('::')[0]
        if f == '':
            continue
        classes[f] = classes.get(f, 0) + 1
        
classes_count = sum(classes.values())

for k, v in classes.items():
    print('{}: {}'.format(k, v))
print('Sum of all features: {}.'.format(classes_count))

feature: 19
url: 5928
api_call: 128
call: 128
permission: 190
real_permission: 40
activity: 3168
intent: 214
service_receiver: 662
provider: 68
Sum of all features: 10545.


In [7]:
# Fit a label encoder and transform the input data
label_encoder = LabelEncoder()
label_encoder.fit(list(features))

encoded = []
for i, x in enumerate(data):
    if i != 0 and i % 500 == 0:
        print('Label encoded {} examples.'.format(i))
    e = label_encoder.transform(x)
    encoded.append(e)

# Create a sparse binary matrix from the input data
indptr = np.cumsum([0] + [len(x) for x in encoded])
indices = np.concatenate(encoded)
ones = np.ones(indices.size)

N, K = len(data), len(features)
X = csr_matrix((ones, indices, indptr), shape=(N, K))
y = np.array(labels)
print('Shape of X: {}. Shape of y: {}.'.format(X.shape, y.shape))

Label encoded 500 examples.
Shape of X: (1000, 10545). Shape of y: (1000,).


In [8]:
# Uncomment to save & load data for future use

# save_npz('data/tmp/malware_X.npz', X)
# np.save('data/tmp/malware_y.npy', y)

# X = load_npz('data/tmp/malware_X.npz')
# y = np.load('data/tmp/malware_y.npy')

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=seed)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((900, 10545), (900,), (100, 10545), (100,))

In [9]:
# Fit logistic regression and perform CV

Cs = np.arange(0.5, 1.5, 0.025)
class_weight = 'balanced' # balanced or None
scoring = 'accuracy' # accuracy or roc_auc

clf = LogisticRegressionCV(
    Cs=Cs, 
    cv=5, 
    n_jobs=-1, 
    penalty='l2',
    scoring=scoring,
    class_weight=class_weight,
    random_state=seed
)
clf.fit(X_train, y_train)

# Get best score and C value
mean_scores = np.mean(clf.scores_[1], axis=0)
best_idx = np.argmax(mean_scores)
best_score = mean_scores[best_idx]
best_C = clf.Cs_[best_idx]

constant_acc = 1 - sum(y_train) / len(y_train)
print('Training score for a constant model f(x) = 0 is: {:.2f}%.'.format(constant_acc*100))
print('Training accuracy is: {:.2f}%. Best C is: {:.4f}. Class weight: {}. Scoring: {}.'
      .format(clf.score(X_train, y_train)*100, best_C, class_weight, scoring))

# Training score for a constant model f(x) = 0 is: 95.11%.
# Training accuracy is: 99.78%. Best C is: 0.7000. Class weight: balanced. Scoring: accuracy.

Training score for a constant model f(x) = 0 is: 95.11%.
Training accuracy is: 99.78%. Best C is: 0.7000. Class weight: balanced. Scoring: accuracy.


In [10]:
constant_acc = 1 - sum(y_test) / len(y_test)
print('Test score for a constant model f(x) = 0 is: {:.2f}%.'.format(constant_acc*100))
print('Test accuracy is: {:.2f}%.'.format(clf.score(X_test, y_test)*100))

# Test score for a constant model f(x) = 0 is: 95.50%.
# Test accuracy is: 97.40%.

Test score for a constant model f(x) = 0 is: 93.00%.
Test accuracy is: 98.00%.


In [11]:
def get_feature_class(idx, label_encoder):
    feature = label_encoder.inverse_transform(idx)
    return feature.split('::')[0]

# Replicate Algorithm 1 from Grosse et al. paper
def find_adversarial(x, clf, oracle, label_encoder, k=20):
    if clf.predict([x]) == 0:
        raise Exception('Initial example is already classified as bening.')
    x_star = np.array(x, dtype='intc')
    distortions = 0
    while clf.predict([x_star]) != 0 and distortions < k:
        derivative = oracle.eval(x_star)
        idxs = np.argsort(derivative)
        for i, idx in enumerate(idxs):
            # Check if changing the feature is permitted.
            if x_star[idx] == 0 and get_feature_class(idx, label_encoder) in MANIFEST_FEATURES:
                x_star[idx] = 1
                break
            if i == len(idxs) - 1:
                raise Exception('Adversarial example is impossible to create.')
        distortions += 1
    if distortions == k:
        raise Exception('Distortion bound reached.')
    return x_star

In [14]:
oracle = LogisticRegressionScikitSaliencyOracle(clf)

for i, x in enumerate(X):
    x = x.toarray()[0]
    if clf.predict([x]) == 1:
        print('\nCrafting adversarial example for example: {}.'.format(i))
        try:
            x_adv = find_adversarial(x, clf, oracle, label_encoder, k=20)
        except Exception as e:
            print(e)
            continue
        x_prob, x_adv_prob = clf.predict_proba([x])[0, 0], clf.predict_proba([x_adv])[0, 0]
        print('Start probability: {:.2f}. Resulting probability: {:.2f}.'.format(x_prob, x_adv_prob))


Crafting adversarial example for example: 18.
Start probability: 0.08. Resulting probability: 0.54.

Crafting adversarial example for example: 102.
Distortion bound reached.

Crafting adversarial example for example: 119.
Start probability: 0.19. Resulting probability: 0.56.

Crafting adversarial example for example: 125.
Start probability: 0.01. Resulting probability: 0.53.

Crafting adversarial example for example: 126.
Start probability: 0.02. Resulting probability: 0.53.

Crafting adversarial example for example: 132.
Start probability: 0.01. Resulting probability: 0.50.

Crafting adversarial example for example: 166.
Distortion bound reached.

Crafting adversarial example for example: 215.
Start probability: 0.02. Resulting probability: 0.55.

Crafting adversarial example for example: 217.
Start probability: 0.02. Resulting probability: 0.54.

Crafting adversarial example for example: 230.
Start probability: 0.38. Resulting probability: 0.51.

Crafting adversarial example for exa