## Contents

#### Common
* **[Library Import](#Library-Import)**
* **[Data Import](#Data-Import)**

#### Algorithm
* **[Hotelling](#Hotelling)**
* **[Change Finder](#Change-Finder)**
* **[LOF](#LOF)**

### Library Import

In [9]:
require 'pycall'
require 'pycall/import'
include PyCall::Import
require 'numpy'
$np = Numpy
require 'daru'
require 'daru/plotly'
require 'daru/dataframe'
include Daru::Plotly::Methods
$scipy = PyCall.import_module('scipy.stats')
$cf = PyCall.import_module('changefinder')
$plt = PyCall.import_module('matplotlib.pyplot')
pyfrom 'sklearn.neighbors', import: 'LocalOutlierFactor'



["LocalOutlierFactor"]

### Data Import

In [10]:
data = Daru::DataFrame.from_csv('/mnt/d/data/anomalyDetect/toy.csv')
data.head(3)

Unnamed: 0,TimeStamp,val0,val1,val2,val3
0,2017-10-08 18:34:33,8.8512018685,7.33782933668,9.178728925,2.753579441
1,2017-10-08 18:39:33,-2.81190568617,-2.3543534772,-2.69658130798,-1.68424473044
2,2017-10-08 18:44:33,0.177683537483,-0.30614046055,0.178235586618,-0.185882889441


### Hotelling

In [40]:
def hotelling_t2(data)
    result = Daru::DataFrame.new()
    original = data['val0'].to_a
    result['ORG'] = original

    mean = $np.mean(original)
    variance = $np.var(original)

    anomaly_scores = []
    for i in original do
      anomaly_score = (i - mean)**2 / variance
      anomaly_scores.push(anomaly_score)
    end

    threshold = $scipy.chi2(df=1).interval(alpha=0.99)[1]
    thresholds = [threshold] * anomaly_scores.length

    result['ANOM_SCORE'] = anomaly_scores
    result['THR'] = thresholds

    detected_points = []
    for j in (0..anomaly_scores.length-1) do
      anom_score = anomaly_scores[j].to_f
      if anom_score >= threshold then
          detected_points.push(original[j])
      else
          detected_points.push(nil)
      end
    end

    result['DETECTED'] = detected_points

    return result
end

:hotelling_t2

In [41]:
rst = hotelling_t2(data)
rst.head(3)

Unnamed: 0,ORG,ANOM_SCORE,THR,DETECTED
0,8.8512018685,0.2035092456412993,7.879438576622417,
1,-2.81190568617,0.0739545550032914,7.879438576622417,
2,0.177683537483,0.0075001375884891,7.879438576622417,


In [None]:
anom = generate_data(rst['ANOM_SCORE'], type: :scatter, opts:{name:'anomaly score', marker:{color:'gray'}})
thr = generate_data(rst['THR'], type: :scatter, opts:{name:'threshold', marker:{color:'red'}})

Plotly::Plot.new(
  data:anom+thr,
  layout:{title:'Anomaly Score & Threshold', xaxis:{title:'time'},
    yaxis:{title:'anomaly score'}}
).show

In [None]:
org = generate_data(rst['ORG'], type: :scatter, opts:{name:'original', marker:{color:'gray'}})
dtc = generate_data(rst['DETECTED'], type: :scatter, opts:{name:'detected', marker:{color:'red'}})

Plotly::Plot.new(
  data:org+dtc,
  layout:{title:'Original & Detected', xaxis:{title:'time'},
    yaxis:{title:'original'}}
).show

### Change Finder

In [49]:
def change_finder(data, param_r=0.001, param_order=1, param_smmoth=5)
    original = data['val0']
    model = $cf.ChangeFinder.(r=param_r, order=param_order, smooth=param_smmoth)

    change_scores = []
    for i in original do
      score = model.update(i)
      change_scores.push(score)
    end

    result = Daru::DataFrame.new()
    result['ORG'] = original
    result['CHG_SCORE'] = change_scores

    ch_score_q1 = $scipy.stats.scoreatpercentile(change_scores, 25) 
    ch_score_q3 = $scipy.stats.scoreatpercentile(change_scores, 75) 
    iqr = ch_score_q3 - ch_score_q1
    thr_upper = ch_score_q3 + iqr * 3
    result['THR'] = [thr_upper] * original.to_a.length

    detected_points = []
    for j in (0..change_scores.length-1) do
    if change_scores[j] > thr_upper then
      detected_points.push(original[j])
    else
      detected_points.push(nil)
    end
    end

    result['DETECTED'] = detected_points
    return result
end

:change_finder

In [50]:
rst = change_finder(data, param_r=0.001, param_order=1, param_smmoth=10)
rst.head(3)

Unnamed: 0,ORG,CHG_SCORE,THR,DETECTED
0,8.8512018685,0.0,56.44350829104731,
1,-2.81190568617,0.0,56.44350829104731,
2,0.177683537483,0.0,56.44350829104731,


In [None]:
org = generate_data(rst['ORG'], type: :scatter, opts:{yaxis:'y1', name:'original', marker:{color:'gray'}})
chs = generate_data(rst['CHG_SCORE'], type: :scatter, opts:{yaxis:'y2', name:'change score', marker:{color:'red'}})
thr = generate_data(rst['THR'], type: :scatter, opts:{yaxis:'y2', name:'threshold', marker:{color:'blue'}})

Plotly::Plot.new(
  data:org+chs+thr,
  layout:{ title:'Original & Change Score', xaxis: { title: 'time'},
    yaxis:{ title:'original', range:[0,2000], side:'left'},
    yaxis2:{ title:'change score', range:[0,100], side:'right', overlaying:'y'}}
).show

In [None]:
org = generate_data(rst['ORG'], type: :scatter, opts:{ name:'original', marker:{color:'gray'}})
dtc = generate_data(rst['DETECTED'], type: :scatter, opts:{ name:'detected', marker:{color:'red'}})

Plotly::Plot.new(
  data:org+dtc,
  layout:{title:'Original & Detected', xaxis:{title:'time'},
    yaxis:{title:'original'}}
).show

In [90]:
$plt.plot(rst['ORG'].to_a)
$plt.plot(rst['DETECTED'].to_a)
$plt.savefig("graph.png")

### LOF

In [21]:
model = LocalOutlierFactor.(n_neighbors:1,novelty: :False,contamination:0.1)

LocalOutlierFactor(algorithm='auto', contamination=0.1, leaf_size=30,
          metric='minkowski', metric_params=None, n_jobs=None,
          n_neighbors=1, novelty='False', p=2)

In [22]:
model.fit([data['val0'].to_a, data['val1'].to_a])

LocalOutlierFactor(algorithm='auto', contamination=0.1, leaf_size=30,
          metric='minkowski', metric_params=None, n_jobs=None,
          n_neighbors=1, novelty='False', p=2)

In [23]:
result = model.predict([data['val2'].to_a, data['val3'].to_a])


array([1, 1])