$$
\newcommand{\mat}[1]{\boldsymbol {#1}}
\newcommand{\mattr}[1]{\boldsymbol {#1}^\top}
\newcommand{\matinv}[1]{\boldsymbol {#1}^{-1}}
\newcommand{\vec}[1]{\boldsymbol {#1}}
\newcommand{\vectr}[1]{\boldsymbol {#1}^\top}
\newcommand{\rvar}[1]{\mathrm {#1}}
\newcommand{\rvec}[1]{\boldsymbol{\mathrm{#1}}}
\newcommand{\diag}{\mathop{\mathrm {diag}}}
\newcommand{\set}[1]{\mathbb {#1}}
\newcommand{\norm}[1]{\left\lVert#1\right\rVert}
\newcommand{\pderiv}[2]{\frac{\partial #1}{\partial #2}}
\newcommand{\bb}[1]{\boldsymbol{#1}}
$$
# ACISDetector
<a id=ACISDetector></a>

In [1]:
import os
import re
import sys
import glob
import pathlib
import numpy as np
import matplotlib.pyplot as plt
import torch

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
plt.rcParams.update({'font.size': 12})
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


## Architecture and instruction set
<a id=arch_is></a>
<!--isadetect 

@inproceedings{kairajarvi2020isadetect,
author={Kairaj\"arvi, Sami and Costin, Andrei and H\"am\"al\"ainen, Timo},
title={{ISAdetect: Usable Automated Detection of CPU Architecture and Endianness for Executable Binary Files and Object Code}},
booktitle={Proceedings of the Tenth ACM Conference on Data and Application Security and Privacy},
year={2020},
url="https://doi.org/10.1145/3374664.3375742"
}
-->

In [58]:
import isadetect.helpers as isa_api 
import src.arch_classifier as arch_api
import src.hyperparams as hp
from src.binary_dataset import FeatureDataset
import src.arch_trainer


## Preprocessing 

In [29]:
DOWNLOAD_URL = 'https://github.com/cedricdeboom/character-level-rnn-datasets/raw/master/datasets/shakespeare.txt'
DATA_DIR = pathlib.Path.home().joinpath('.pytorch-datasets')

def download_dataset(out_path=DATA_DIR, url=DOWNLOAD_URL, force=False):
    pathlib.Path(out_path).mkdir(exist_ok=True)
    out_filename = os.path.join(out_path, os.path.basename(url))
    
    if os.path.isfile(out_filename) and not force:
        print(f'Dataset file {out_filename} exists, skipping download.')
    else:
        print(f'Downloading {url}...')
        with urllib.request.urlopen(url) as response, open(out_filename, 'wb') as out_file:
            shutil.copyfileobj(response, out_file)
        print(f'Saved to {out_filename}.')
    return out_filename

#DATASET_FILE = download_dataset()
DATASET_FILE = "./dataset/features.csv" 
print(DATASET_FILE)

./dataset/features.csv


In [22]:
binary_dataset = FeatureDataset(DATASET_FILE)
N = len(binary_dataset)
batch_size = 10
print(f'features length: {N}')

train_length = int(0.7* N)
test_length = N - train_length
ds_train,ds_test = torch.utils.data.random_split(binary_dataset,(train_length,test_length))

print(f'Train: {len(ds_train)} samples')
print(f'Test: {len(ds_test)} samples')

dl_train=torch.utils.data.DataLoader(ds_train,batch_size=batch_size, shuffle=True)

x0,y0 = ds_train[0]
dataset_shape = (x0.shape if x0.dim() > 0 else 1),(y0.shape if y0.dim() > 0 else 1)
print('input size =', dataset_shape[0], "X",dataset_shape[1] )

features length: 581
Train: 406 samples
Test: 175 samples
input size = torch.Size([293]) X 1


## Training
<a id=part2_3></a>

in order to plot our result and to compare them, we will use plot.py
and then we'll use the following function to load multiple experiment results and plot them together.

In [52]:
from jupyter_utils.plot import plot_fit
fig = None
fit_res = []

help(plot_fit)

def plot_exp_results(filename_pattern, results_dir='results'):
    fig = None
    result_files = glob.glob(os.path.join(results_dir, filename_pattern))
    result_files.sort()
    if len(result_files) == 0:
        print(f'No results found for pattern {filename_pattern}.', file=sys.stderr)
        return
    for filepath in result_files:
        m = re.match('exp\d_(\d_)?(.*)\.json', os.path.basename(filepath))
        cfg, fit_res = load_experiment(filepath)
        fig, axes = plot_fit(fit_res, fig, legend=m[2],log_loss=True)
    del cfg['filters_per_layer']
    del cfg['layers_per_block']
    print('common config: ', cfg)

Help on function plot_fit in module jupyter_utils.plot:

plot_fit(fit_res: jupyter_utils.train_results.FitResult, fig=None, log_loss=False, legend=None)
    Plots a FitResult object.
    Creates four plots: train loss, test loss, train acc, test acc.
    :param fit_res: The fit result to plot.
    :param fig: A figure previously returned from this function. If not None,
        plots will the added to this figure.
    :param log_loss: Whether to plot the losses in log scale.
    :param legend: What to call this FitResult in the legend.
    :return: The figure.



In [59]:
rf_hp = hp.random_forest_hp()
print(rf_hp)

_randomForest = arch_api.RandomForest(in_estimators = 100 , in_max_depth = 32,random_state= 0 , n_jobs=-1)
print(_randomForest)

#fit_res.insert(trainer(_randomForest))

{'wstd': 0.1, 'lr': 0.01, 'reg': 0.05}
RandomForest()


In [60]:
mlp_hp = hp.mlp_hp()
print(mlp_hp)

#_mlp = arch_api.MLP(100,32,0,1,0)
#print(_mlp)

#fit_res.insert(trainer(_mlp))


{'wstd': 0.1, 'lr_vanilla': 0.01, 'lr_momentum': 0.02, 'lr_rmsprop': 1e-05, 'reg': 0.01}


In [61]:
cnn_hp = hp.cnn_hp()
print(cnn_hp)

#_cnn = arch_api.CNN()
#print(_cnn)

#fit_res.insert(trainer(_cnn))


{'wstd': 0.1, 'lr': 0.001}


In [56]:
for res in fit_res:
    fig, axes = plot_fit(fit_res, fig, legend=m[2],log_loss=True)
