In [1]:
import os
# move to project root
os.chdir('/home/rcgonzal/DSC180Malware/m2v-adversarial-hindroid/')

import pandas as pd
import numpy as np

from src.model.model import M2VDroid
from src.model.hindroid import Hindroid
from src.data.hindroid_etl import make_models
from src.analysis.analysis import create_performance_table
from src.utils import find_apps
from scipy import sparse

%load_ext autoreload
%autoreload 2

In [2]:
hindroid = Hindroid('data/out/train-set/')

In [4]:
attack_folder = os.path.join('data', 'out', 'all-apps', 'attack')

with open(os.path.join(attack_folder, 'advxs.npz'), 'rb') as advxs_file, open(os.path.join(attack_folder, 'inputs.npz'), 'rb') as inputs_file: 
    adv_examples = sparse.load_npz(advxs_file)
    input_examples = sparse.load_npz(inputs_file)
    
# change in apis by app
# np.mean(np.sum(np.round(adv_examples.todense()) != input_examples.todense(), axis=1))

In [7]:
# get mean number of APIs changed
changed_apis = []
for row_idx in range(adv_examples.shape[0]):
    s = np.sum(np.round(adv_examples[row_idx].todense()) != input_examples[row_idx].todense())
    changed_apis.append(s)
    
changed_apis = np.array(changed_apis)
changed_apis.mean()

38194.84

In [8]:
(changed_apis / adv_examples.shape[1]).mean()

0.015062820842977272

In [63]:
# change in proportion of all apis
np.mean(adv_examples.todense() != input_examples.todense(), axis=1)

matrix([[0.02179356],
        [0.01061323],
        [0.01647906],
        [0.01005441],
        [0.01721219],
        [0.01475212],
        [0.01475567],
        [0.02068578],
        [0.01245375],
        [0.02275464]])

In [64]:
hindroid.A.shape

(2024, 2535703)

In [9]:
orignal_predictions = hindroid.batch_predict(input_examples)
orignal_predictions

Predicting AAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting AAT, batch: 100%|██████████| 5/5 [00:03<00:00,  1.35it/s]
Predicting ABAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting ABAT, batch: 100%|██████████| 5/5 [02:11<00:00, 26.34s/it]
Predicting APAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting APAT, batch: 100%|██████████| 5/5 [00:16<00:00,  3.23s/it]
Predicting ABPBTAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting ABPBTAT, batch: 100%|██████████| 5/5 [19:32<00:00, 234.58s/it]
Predicting APBPTAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting APBPTAT, batch: 100%|██████████| 5/5 [12:16<00:00, 147.20s/it]


Unnamed: 0,AAT,ABAT,APAT,ABPBTAT,APBPTAT
0,1,1,1,1,1
1,1,1,1,1,1
2,1,1,1,1,1
3,1,1,1,1,1
4,1,1,1,1,1
...,...,...,...,...,...
495,1,1,1,1,1
496,1,1,1,1,1
497,1,1,1,1,1
498,1,1,1,1,1


In [10]:
adv_predictions = hindroid.batch_predict(adv_examples)
adv_predictions

Predicting AAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting AAT, batch: 100%|██████████| 5/5 [00:05<00:00,  1.16s/it]
Predicting ABAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting ABAT, batch: 100%|██████████| 5/5 [01:47<00:00, 21.48s/it]
Predicting APAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting APAT, batch: 100%|██████████| 5/5 [00:52<00:00, 10.50s/it]
Predicting ABPBTAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting ABPBTAT, batch: 100%|██████████| 5/5 [16:30<00:00, 198.20s/it]
Predicting APBPTAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting APBPTAT, batch: 100%|██████████| 5/5 [12:10<00:00, 146.16s/it]


Unnamed: 0,AAT,ABAT,APAT,ABPBTAT,APBPTAT
0,0,1,0,1,0
1,0,1,0,1,0
2,0,1,0,1,0
3,0,1,0,1,0
4,0,1,0,1,0
...,...,...,...,...,...
495,0,1,0,1,0
496,0,1,0,1,0
497,0,1,0,1,0
498,0,1,0,1,0


In [16]:
orignal_predictions.to_csv('data/out/all-apps/attack/original_predictions.csv', index=False)
adv_predictions.to_csv('data/out/all-apps/attack/adv_predictions.csv', index=False)

In [17]:
(orignal_predictions != adv_predictions).mean()

AAT        0.972
ABAT       0.116
APAT       0.946
ABPBTAT    0.108
APBPTAT    0.890
dtype: float64

In [40]:
orignal_predictions.AAT.value_counts()

<reversed at 0x7fa76a1f7c10>

In [37]:
np.append(orignal_predictions.AAT.value_counts(), orignal_predictions.shape[0])

array([445,  55, 500])

In [45]:
comparison = orignal_predictions != adv_predictions
comparison['Original AAT Label'] = orignal_predictions.AAT.map({1: 'Malware', 0: 'Benign'})
out = comparison.groupby('Original AAT Label').mean()
out.loc['Total'] = comparison.mean()
counts = orignal_predictions.AAT.value_counts()
counts.index = counts.index.map({1: 'Malware', 0: 'Benign'})
counts['Total'] = orignal_predictions.shape[0]
out['Support'] = counts
out.to_csv('reports/assets/attack_success.csv')

In [14]:
orignal_predictions.AAT.value_counts()

1    445
0     55
Name: AAT, dtype: int64