# Pymetry Anomaly Satelite

## Import and Initialize

In [None]:
import sys

sys.path.append("/opt/symetry/python")

import pandas as pd
import numpy as np
import json

import pymetry as pym
import pymetry.project as pym_prj
import pymetry.utilities.jobs as pym_job
import pymetry.utilities.utils as pym_ut

import sklearn.metrics as skmetrics
import sklearn.model_selection as skms

In [None]:
pym.init_conf({
    "SERVER" : 'http://charm:8080',
    "SYM_KEY_ID" : "c1",
    "SYM_SECRET_KEY" : "A1ciUrDJgm5LIJU710bxeQ==",
})

## Read the data

### Satellite

In [None]:
DATA_FILE = "../../../data/satellite.mat.csv"

df = pd.read_csv(DATA_FILE)
train_df, test_df = skms.train_test_split(df, test_size=0.2, random_state=42)

print(train_df.shape)
print(test_df.shape)

smldf_train = pym_ut.pandas_df_to_sml_json(train_df)
smldf_test = pym_ut.pandas_df_to_sml_json(test_df)

In [None]:
df.head()
#

## Create and Learn Project

In [None]:
prj_name = 'anomaly'
project_type = 'cpu'
project_params = {
    "rf_type" : "rf_anomaly",
    "rf_features" : "0:1:2:3:4:5:6:7:8:9:10:11:12:13:14:15:16:17:18:19:20:21:22:23:24:25:26:27:28:29:30:31:32:33:34:35",
}
rsp = pym_prj.create_project(prj_name, project_type=project_type, project_params=project_params)
print(rsp.content)

In [None]:
%%time

client_id = "pymetry-ipynb"

rsp = pym_prj.stream_data_to_project(prj_name, smldf_train, client_id='pymetry')
print(rsp.content)


## Build RF Anomaly Model

In [None]:
%%time

model_name = 'manomaly'
ml_context = {}
rsp = pym_prj.build_model(ml_context,
                          prj_name,
                          model_name,
                          algo = 'rf_anomaly')
print(rsp.content)
pym_job.wait_for_job_finish(rsp)

In [None]:
rsp = pym_prj.get_model_info(prj_name, model_name)
model_info = json.loads(rsp.content)
model_info = model_info['values']['modelInfo']

## Predict and Compute AUC

In [None]:
predict_df = test_df.drop('y', axis=1)
anomaly_predict_proba = pym_prj.make_predict_proba(predict_df, prj_name, model_name)
predict_result = anomaly_predict_proba(predict_df)

In [None]:
import plotly.graph_objects as go
import plotly.offline as py

fig = go.FigureWidget()

fig.add_scatter()

fig.update_yaxes(range=[0, 1], title=dict(text="Anomaly Score"))
fig.update_xaxes(range=[0, len(test_df)], title=dict(text="Record"))

vbars = [
    dict(type= 'line', yref= 'paper', y0= 0, y1= 1, xref= 'x', x0= i, x1= i, opacity=0.2) 
    for i,v in enumerate(test_df['y']) if v == 1 ]

fig.update_layout(title="Anomaly Scores Across Records",shapes=vbars)

py.init_notebook_mode(connected=True)
py.iplot(fig)

In [None]:
import time
start = time.time()

Show anomalies along in real time

In [None]:
# BASE RES
predict_df = test_df.drop('y', axis=1)

results = []
for i in range(len(predict_df)):
    record = predict_df.iloc[[i]]
    res = anomaly_predict_proba(record)
    results.append(res[0])
    with fig.batch_update():
        fig.data[0].x = np.arange(i)
        fig.data[0].y = results



Show anomalies in two dimensional space

In [None]:
test_df.loc[:, 'predicted'] = predict_result

In [None]:
test_df.head()

In [None]:
import plotly.graph_objs as go
from ipywidgets import interactive, HBox, VBox

symbols = []
for y in test_df['y']:
    marker = 4
    if y == 0 :
        marker = 0
    symbols.append(marker)

    f = go.FigureWidget([
    go.Scatter(x = test_df['0'],
               y = test_df['1'],
               mode = 'markers',
               marker=dict(
                   color=test_df['predicted'],
                   colorbar=dict(title="Anomaly Score"),
                   colorscale="OrRd"
               ),
               marker_symbol=symbols,
               text=test_df['y'],
               hovertemplate="(%{x},%{y}) <b>Anomaly = %{text}</b><br><br>"
              )
])

f.update_layout(
    title={
        'text': "Anomaly Score",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}
)

scatter = f.data[0]
N = len(test_df)
scatter.x = scatter.x
scatter.y = scatter.y

f.layout.xaxis.title = '0'
f.layout.yaxis.title = '1'

def update_axes(xaxis, yaxis):
    scatter = f.data[0]
    scatter.x = test_df[xaxis]
    scatter.y = test_df[yaxis]
    with f.batch_update():
        f.layout.xaxis.title = xaxis
        f.layout.yaxis.title = yaxis
        scatter.x = scatter.x
        scatter.y = scatter.y

axis_dropdowns = interactive(update_axes,
                             yaxis = test_df.select_dtypes('int64').columns,
                             xaxis = test_df.select_dtypes('int64').columns)

# Put everything together
VBox((HBox(axis_dropdowns.children), f))

py.iplot(f)

## Clear

In [None]:
clear = True

if clear:
    # Remove All Projects
    rsp = pym_prj.delete_project(prj_name)
    print(rsp.content)