In [2]:
import numpy
import keras

from keras.models import Sequential
from keras.layers import Dense

numpy.random.seed(0xC0FFEE)

Using TensorFlow backend.


# Dataset

## Loading data

Run the below code to download a copy of the dataset (if you don't already have it):

In [0]:
import requests
import io
import zipfile

response = requests.get("http://www.schonlau.net/masquerade/masquerade-data.zip")
dataset_file = io.BytesIO(downloaded_dataset.content)
zipped_dataset = zipfile.ZipFile(dataset_file)
zipped_dataset.extractall('data/masquerade-data')

In [0]:
# http://www.schonlau.net/intrusion.html
# download Masquerade Data (zip File)

import pandas as pd
import os
directory = './data/masquerade-data'

In [0]:
import re
def sorted_nicely( l ):
    """ Sorts the given iterable in the way that is expected.
 
    Required arguments:
    l -- The iterable to be sorted.
 
    """
    convert = lambda text: int(text) if text.isdigit() else text
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key = alphanum_key)

In [0]:
users = range(1,51)
df = pd.DataFrame()

for filename in sorted_nicely(os.listdir(directory)):
    user = pd.read_csv(os.path.join(directory, filename), header=None)
    df = pd.concat([df, user], axis = 1)
    
df.columns = sorted_nicely(os.listdir(directory))

In [35]:
df

Unnamed: 0,User1,User2,User3,User4,User5,User6,User7,User8,User9,User10,...,User41,User42,User43,User44,User45,User46,User47,User48,User49,User50
0,cpp,cat,Xsession,cat,cpp,cpp,cpp,cpp,cpp,cat,...,cpp,cpp,cpp,cpp,hostname,cpp,cat,cat,touch,cpp
1,sh,stty,sed,mail,sh,sh,sh,sh,sh,mail,...,sh,sh,sh,sh,cat,sh,cat,id,sh,sh
2,xrdb,ls,Xsession,csh,xrdb,xrdb,xrdb,xrdb,xrdb,tcsh,...,xrdb,xrdb,xrdb,xrdb,.xsessio,xrdb,date,whoami,appdefpa,xrdb
3,cpp,ls,sed,toolches,mkpts,mkpts,cpp,mkpts,cpp,tcsh,...,cpp,cpp,mkpts,cpp,sed,cpp,cat,telnet,cpp,mkpts
4,sh,ls,grep,sh,test,hostname,sh,env,sh,cat,...,sh,sh,stty,sh,cat,sh,cat,ksh,sh,env
5,xrdb,ls,wc,rm,hostname,awk,xrdb,ksh,xrdb,mail,...,xrdb,xrdb,tset,xrdb,.xsessio,xrdb,date,cpp,xrdb,ksh
6,mkpts,ls,date,sh,stty,stty,mkpts,ksh,mkpts,tcsh,...,mkpts,mkpts,chmod,mkpts,sed,mkpts,flock,sh,sleep,ksh
7,env,xdvi.rea,uname,MediaMai,date,tset,test,userenv,hostname,sh,...,hostname,hostname,uname,stty,hostname,hostname,tcsh,xrdb,reaper,userenv
8,csh,xdvi,uname,ls,echo,env,stty,wait4wm,date,sendmail,...,stty,awk,cpp,tset,stty,date,keep_up,mkpts,sh,wait4wm
9,csh,cat,uname,cpp,[,ksh,hostname,xhost,env,sendmail,...,env,stty,sh,env,touch,stty,sh,env,sleep,xhost


In [0]:
train, test = df.head(501), df.tail(len(df) - 501)

## Generate a Random Dataset

Make up a silly dataset to make sure the oracle model expects the right thing.

In [0]:
commands = [
    '=',
    '==',
    '[',
    '1.1',
    '1.2',
    '1.3',
    '4Dwm',
    '5650.exe',
    '5836.exe',
    '7105.exe',
    '7956.exe',
    '8117.exe',
    '8708.exe',
    '9term',
    'aacdec',
    'aa.new.n',
    'aa.new.s',
    'accesspo',
    'acc.prof',
    'acroread',
    'addrinfo',
    'admin',
    'agrep',
    'aiffplay',
    'ama.chec',
    'ama_volu',
    'a.out',
    'apanel',
    'appdefpa',
    'ar',
    'arch',
    'Archie',
    'arch_uni',
    'arp',
    'array_te',
    'as',
    'as1',
    'ascii',
    'a.sl',
    'at',
    'augment_',
    'aupanel',
    'auplay',
    'aus',
    'autoconf',
    'awk',
    'awk.html',
    'backtalk',
    '%backup%',
    'banner',
    'basename',
    'bash',
    'BATCH',
    'bb_rep',
    'bb_rep_f',
    'bb_rep_n',
    'bb_rep_t',
    'bc',
    'bdftopcf',
    'bdiff',
    'be',
    'bibtex',
    'bindkey',
    'bind_so_',
    'bind_uni',
    'binhex',
    'bison',
    'blossom4',
    'bo_rep',
    'bo_rep_c',
    'bo_rep_f',
    'bo_rep_t',
    'bo_table',
    'bo_top',
    'bo_type',
    'btbuild',
    'btcreat',
    'byte_rev',
    'cal',
    'calendar',
    'calldd',
    'call_fil',
    'calprog',
    'cancel',
    'capture',
    'cat',
    'catalog',
    'catdoc',
    'CC',
    'cc1',
    'cdc',
    'cdec',
    'cfe',
    'c++filt',
    'cgiparse',
    'chat.awk',
    'chkconfi',
    'chmod',
    'chown',
    'ci',
    'cled',
    'cled_jct',
    'clock',
    'cmex',
    'cmp',
    'co',
    'col',
    'colthloo',
    'colthrea',
    'comm',
    'comma.te',
    'compress',
    'comp_uni',
    'concorde',
    'config.g',
    'config.s',
    'configur',
    'Configur',
    'conftest',
    'convert',
    'c++patch',
    'cpeek',
    'cpio',
    'cplex',
    'cpp',
    'crnl',
    'crontab',
    'crypt',
    'csh',
    'ctags',
    'cut',
    'cxwsh',
    'data_cl.',
    'date',
    'date-dif',
    'dbl',
    'dbx',
    'dbxpcs',
    'dc',
    'DC-prn',
    'dd',
    'ddd',
    'ddtest',
    'dec',
    'define',
    'delatex',
    'delta',
    'demo',
    'deroff',
    'desktopM',
    'detail_o',
    'detex',
    'dev.moti',
    'dev.post',
    'dev.X11',
    'df',
    'dialog.s',
    'dict',
    'diff',
    'dig',
    'dirname',
    'doc2ps',
    'doctype',
    'do.hourl',
    'domainna',
    'do.priso',
    'dot',
    'do.trit',
    'download',
    'dpost',
    'dprog',
    'drag',
    'drag2',
    'drawgrap',
    'drf',
    'drill_do',
    'driver',
    'driverwr',
    'ds_ar',
    'du',
    'dummy',
    'dvipost',
    'dvips',
    'dviselec',
    'e',
    'echo',
    'ed',
    'edgcpfe',
    'edgegen',
    'edg_prel',
    'efm',
    'egrep',
    'elm',
    'emacs-20',
    'emrvol',
    'enc',
    'endsessi',
    'engine',
    'enscript',
    'env',
    'eptofax',
    'eqn',
    'etags',
    'euphony',
    'euphony3',
    'ex',
    'expr',
    'expreser',
    'exrecove',
    'extract_',
    'f',
    'f2ps',
    'fa.booku',
    'faces',
    'fa.click',
    'false',
    'fastmail',
    'fcom',
    'fec',
    'fecc',
    'fgrep',
    'field',
    'FIFO',
    'fig2dev',
    'file',
    'find',
    'findobj',
    'find_RT',
    'finger',
    'fish2',
    'fish4',
    'flex',
    'flock',
    'flog',
    'flow',
    'fls_star',
    'fm',
    'fmarch',
    'fm_flb',
    'fm_misd',
    'fmprintd',
    'fmt',
    'fold',
    'foo',
    'force_up',
    'format.d',
    'frm',
    'ftp',
    'ftp.orig',
    'fvwm',
    'FvwmPage',
    'fx',
    'fxfilter',
    'fxprint',
    'fxsend',
    'fxshut',
    'fxstat',
    'fxstatus',
    'fxvision',
    'gawk',
    'gcc',
    'gdb',
    'gdiff',
    'generic',
    'gengraph',
    'get',
    'get_acc',
    'get_acc_',
    'getans',
    'getconf',
    'gethost',
    'get_line',
    'get.line',
    'getopt',
    'getpgrp',
    'getsampl',
    'gettxt',
    'gftopk',
    'ghostvie',
    'giftrans',
    'gimp',
    'gmake',
    'gnudiff',
    'gnuplot',
    'gnuplot_',
    'GoodStuf',
    'gordon',
    'gp',
    'gramlx',
    'graph_te',
    'gre',
    'grep',
    'groups',
    'gr_top',
    'gs',
    'gs3.33',
    'gsftopk',
    'gv',
    'gzip',
    'head',
    'heartche',
    'help',
    'help2',
    'help.fin',
    'help.key',
    'help.sor',
    'help.top',
    'hexbin',
    'hightoll',
    'hilow',
    'hinv',
    'hippo',
    'history',
    'hist_tes',
    'hoc',
    'host',
    'hostname',
    'hpost',
    'ht',
    'htn_date',
    'htn_edit',
    'htn_repo',
    'hype',
    'ico',
    'id',
    'identify',
    'imake',
    'imgview',
    'inc',
    'indent',
    'info',
    'infocmp',
    'init_src',
    'inline',
    'install-',
    'interest',
    'ipeek',
    'ispell',
    'j11',
    'jar',
    'java',
    'javac',
    '.java_wr',
    'join',
    'jot',
    'jre',
    'justlex',
    'justspec',
    'keep_up',
    'kill',
    'killall',
    'kludgepl',
    'kmist',
    'ksh',
    'last',
    'lattice_',
    'launchef',
    'lc',
    'lcc',
    'ld',
    'ld_',
    'ld64_',
    'lec2.awk',
    'lec.awk',
    'led',
    'less',
    'lex',
    'lex.spec',
    'line.pro',
    'lint',
    'lint1',
    'lint2',
    'list2.pl',
    'list.pl',
    'lks',
    'lmstat',
    'ln',
    'lo',
    'local.Sq',
    'LOCK',
    'logname',
    'long',
    'lp',
    'lp2col',
    'lpdsend',
    'lpe3',
    'lp.orig',
    'lpq',
    'lpr',
    'lps',
    'ls',
    'm',
    'm3_binin',
    'm3_compt',
    'm3_flsd',
    'm3_flse',
    'm3_manfl',
    'm4',
    'macunpac',
    'magma.ex',
    'mail',
    'Mail',
    'mailbox',
    'mailp',
    'mailx',
    'Main',
    'make',
    'make_del',
    'make_hig',
    'makeinde',
    'maker5X.',
    '.maker_w',
    'MakeTeXP',
    'make_tod',
    'makexgvi',
    'man',
    'maple.sy',
    'mapleTTY',
    'mars.sh',
    'matlab',
    'matlab_l',
    'mbackup',
    'mc',
    'MediaMai',
    'mesg',
    'metamail',
    'mhl',
    'mhn',
    'mi',
    'mimencod',
    'mkdir',
    'mkfontdi',
    'mklink.s',
    'mkpts',
    'more',
    'Mosaic',
    'movemail',
    'moviepla',
    'mp',
    'mpeg_pla',
    'mplotcha',
    'mplotps',
    'mplottek',
    'mplotx11',
    'msort',
    'munpack',
    'mycut',
    'mycut2',
    'my.ls',
    'my.ls.2',
    'my.ls.re',
    'mysql',
    'mysqladm',
    'mysql_in',
    'mywsh',
    'named',
    'nawk',
    'ncdquery',
    'neato',
    'nedit',
    'neqn',
    'netscape',
    'netstat',
    'newalias',
    'newmail',
    'news',
    'nfsstat',
    'nice',
    'nlcrack',
    'nlcrack2',
    'nlgen',
    'nlx',
    'nly',
    'nlz',
    'nlz2',
    'nm',
    'nm_elf',
    'nohup',
    'nospool',
    'npasplit',
    'nr',
    'nroff',
    'nscal',
    'ns-insta',
    'nslookup',
    'ntrim',
    'ntrim.in',
    'nw_8s_un',
    'od',
    'OLI.sh',
    'on',
    'one.awk',
    'op_cvmod',
    'op_mko',
    'op_mksim',
    'opnet',
    'op_newus',
    'op_runsi',
    'orig',
    'orig_sca',
    'overlap',
    'overlap2',
    'p',
    '=p',
    'pacdec',
    'pagemail',
    'panel_te',
    'passwd',
    'paste',
    'patch',
    'payphone',
    'pcst',
    'pcst1',
    'pcst.pur',
    'pdf2ps',
    'pdftops',
    'perl',
    'pftp',
    'pg',
    'pine',
    'ping',
    'plaid',
    'PLATFORM',
    'point.sh',
    'polar',
    'popper',
    'post',
    'postprin',
    'postreve',
    'pow',
    'ppost',
    'ppq',
    'ppqcomma',
    'pq',
    'pr',
    'prefix',
    'print_ca',
    'print_de',
    'print_do',
    'printf',
    'printreq',
    'print_sc',
    'print_us',
    'prisoncs',
    'prison_f',
    'prison_p',
    'prison_r',
    'profile',
    'ps',
    'ps2epsi',
    'ps2pdf',
    'psnr',
    'psu',
    'ptelnet',
    'punlx',
    'purify.s',
    'pwd',
    'q_eg',
    'q_egtest',
    'qk',
    'qpage',
    'q_test',
    'quota',
    'r',
    'R',
    'random_t',
    'randseq',
    'rbnull',
    'rcc',
    'rcp',
    'rcsdiff',
    'rdistd',
    'readacct',
    'readmsg',
    'reaper',
    'red',
    'Reducyr',
    'register',
    'renice',
    'req.new',
    'resize',
    'reverse',
    'rexecd',
    'rftp',
    'richtext',
    'rlogin',
    'rm',
    'rmail',
    'rmdir',
    'rmm',
    'rootless',
    'rpcinfo',
    'rsh',
    'rshd',
    'rtslave',
    'runnit',
    'run_swin',
    'rup',
    'ruptime',
    'rusers',
    'rvplayer',
    'rwho',
    'rz',
    'S',
    'sam',
    'sample',
    'samterm',
    'sar',
    'scampdet',
    'scamp_fi',
    'scamp_pr',
    'scamp_to',
    'scan',
    'scatter_',
    'sccs',
    'scheme',
    'scp',
    'scroll_t',
    'sd',
    'sdec',
    'sdlgMoti',
    'sed',
    'seecalls',
    'seediff',
    'see_scam',
    'sendmail',
    'seq',
    'setup',
    'sfplay',
    'sfstdgen',
    'sgihelp',
    'sgo',
    'sh',
    'shar',
    'shelpMot',
    'show',
    'showcal',
    'showdoc',
    'show_fil',
    'showfile',
    'shownona',
    'showprod',
    'showps',
    'sim301bK',
    'sim301bS',
    'Sizup',
    'sleep',
    'slide',
    'Slmclien',
    'Slmhelpe',
    'slogin',
    'sort',
    'soundpla',
    'soxpand',
    'spec',
    'spell',
    'split',
    'splitmai',
    'sprog',
    'Sqpe',
    'sqp_fill',
    'Squirrel',
    'ssh',
    'ssh-add',
    'ssh-agen',
    'ssh-askp',
    'ssh-keyg',
    'ssplay',
    'states',
    'Stat_Ind',
    'std_date',
    'stream_b',
    'stream_t',
    'strings',
    'stripper',
    'stty',
    'style1',
    'style2',
    'style3',
    'su',
    'suepope4',
    'sum',
    'summary.',
    'swap',
    'sysinfo',
    'sz',
    't',
    'tail',
    'talk',
    'tar',
    'tbl',
    'tcm',
    'tcm5na',
    'tcm8',
    'tcm8a',
    'tcm8na',
    'tcpostio',
    'tcppost',
    'tcsh',
    'tee',
    'tektroni',
    'tel',
    'tellwm',
    'telnet',
    'telno',
    'tes',
    'test',
    'test2.pl',
    'tester',
    'testFont',
    'testHist',
    'test.m2.',
    'test.pl',
    'text_are',
    'tftp',
    'tifftofa',
    'time',
    'toolches',
    'top',
    'touch',
    'tput',
    'tr',
    'tracerou',
    'Tracy',
    'trn',
    'troff',
    'true',
    'tset',
    'ttcm',
    'ttcm8',
    'tty',
    'twm',
    'twoprint',
    'ugen',
    'ul',
    'uname',
    'uniq',
    'UNLOCK',
    'unpack',
    'unzip',
    'uopt',
    'update',
    'use_abus',
    'userenv',
    'uudecode',
    'uuencode',
    'uuname',
    'v10sort',
    'vacation',
    'vc',
    'vim',
    'vinay',
    'vipw',
    'virmf',
    'virtex',
    'vis',
    'volumes.',
    'vreg',
    'vsimsg',
    'vsiupdst',
    'vt100',
    'vtwm',
    'w',
    'W',
    'w3c',
    'wait4wm',
    'wc',
    'wdefine',
    'webify',
    'webmagic',
    'what',
    'where',
    'whereis',
    'which',
    'who',
    'whoami',
    'whodo',
    'whois',
    'windows',
    'window_t',
    'winterm',
    'worklist',
    '.wrapper',
    'X',
    'x11perf',
    'x3270',
    'xargs',
    'xauth',
    'xbiff',
    'xcal',
    'xcalc',
    'xcalenda',
    'xclock',
    'xconfirm',
    'xdemineu',
    'xdiff',
    'xdm',
    'xdpyinfo',
    'xdvi',
    'xdvi.rea',
    'xemacs-1',
    'xemacs-2',
    'xev',
    'xfig',
    'xfontsel',
    'xfs',
    'xgas',
    'xgobi',
    'xgvis',
    'xhost',
    '.xinitrc',
    'xinitrem',
    'xlbiff',
    'xlistscr',
    'xload',
    'xloadima',
    'xlsclien',
    'xlsfonts',
    'xmag',
    'xman',
    'xmaplev4',
    'xmaplev5',
    'xmessage',
    'xmh',
    'xmineswe',
    'xmkmf',
    'xmodmap',
    'xpaint',
    'xpdf',
    'xpr',
    'xprop',
    'xrdb',
    'Xremote',
    'xrn',
    'xrt_auth',
    'xrtld',
    '.xsessio',
    'Xsession',
    'xset',
    'xsetroot',
    'xt',
    'xterm',
    'xupdate',
    'xv',
    'xwd',
    'xwininfo',
    'xwsh',
    'xxx',
    'yacc',
    'ypcat',
    'yppasswd',
    'z',
    'zip',
    'zsh',
    'zubs',
    'zz2',
]

num_commands = len(commands)
num_commands

856

In [0]:
raw_training_data =  numpy.random.randint(
    low=1, 
    high=50, 
    size=(7500, num_commands),
)
raw_training_labels =  numpy.random.randint(
    low=1, 
    high=50, 
    size=(7500, 1),
)

In [0]:
import pandas

training_labels =  keras.utils.to_categorical(raw_training_labels, num_classes=50)
training_dataset = pandas.DataFrame(raw_training_data, columns=commands)

In [0]:
training_dataset.describe()

Unnamed: 0,=,==,[,1.1,1.2,1.3,4Dwm,5650.exe,5836.exe,7105.exe,...,xwsh,xxx,yacc,ypcat,yppasswd,z,zip,zsh,zubs,zz2
count,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,...,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0
mean,24.862267,24.9124,25.157333,25.019067,25.0728,24.778667,24.908533,24.9916,25.238933,24.859867,...,25.156,24.684267,24.866667,24.9188,25.020133,24.947333,24.7812,25.058667,25.122133,25.375333
std,14.084966,14.068648,14.179607,14.18066,14.109285,14.155578,14.232891,13.997621,14.10933,14.145528,...,14.2073,14.198234,14.060964,14.215652,14.161038,14.121008,14.091806,14.157517,14.091599,14.131167
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,...,13.0,12.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0
50%,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,...,25.0,24.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0
75%,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,38.0,37.0,...,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,37.0,38.0
max,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,...,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0


# Building the Oracle

In [0]:
oracle = Sequential()

In [0]:
input_layer = Dense(
    units=856,
    activation='relu',
    input_dim=856,
)

In [0]:
hidden_layer = Dense(
    units=30,
    activation='relu',
)

In [0]:
output_layer = Dense(
    units=50,
    activation='sigmoid',
)

In [0]:
oracle.add(input_layer)
oracle.add(hidden_layer)
oracle.add(output_layer)

In [0]:
oracle.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'],
)

# Running oracle on Dataset

In [0]:
oracle.fit(training_dataset,  training_labels, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f0bc5e7b278>

In [0]:
oracle

<keras.engine.sequential.Sequential at 0x7f0bcb651da0>

In [0]:
score = oracle.evaluate(training_dataset, training_labels)



In [0]:
score

[9.226850337219238, 0.0224]

In [0]:
oracle.metrics_names

['loss', 'acc']