[![CyVers](https://i.imgur.com/yyhmZET.png)](https://www.cyvers.ai/)

# Deployed Algorithm Workflow

This notebooks shows the workflow of using an algorithm.  
See [`SystemEngineering.md](https://github.com/CyVers-AI/CyVersManagement/blob/main/Engineering/SystemEngineering.md) for details.

> Notebook by:
> - Anton Rudenko Anton@cyvers.ai
> - Royi Avital Royi@cyvers.ai

## Revision History

| Version | Date       | User        |Content / Changes                                                   |
|---------|------------|-------------|--------------------------------------------------------------------|
| 0.1.000 | 16/08/2022 | Royi Avital | Matching version 0.8 of the API                                    |
|         |            |             |                                                                    |

In [None]:
# Load Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Misc
import datetime
import os
from platform import python_version
import random
import warnings

# Visualization
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from bokeh.plotting import figure, show



In [None]:
# Configuration
%matplotlib inline

warnings.filterwarnings("ignore")

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

sns.set_theme() #>! Apply SeaBorn theme

In [None]:
# Constants

DATA_FOLDER_NAME    = 'BlockChainAttacksDataSet'
DATA_FOLDER_PATTERN = 'DataSet001'
DATA_FILE_EXT       = 'csv'

PROJECT_DIR_NAME = 'CyVers' #<! Royi: Anton, don't change it, it should be a team constant
PROJECT_DIR_PATH = os.path.join(os.getcwd()[:os.getcwd().find(PROJECT_DIR_NAME)], PROJECT_DIR_NAME) #>! Pay attention, it will create issues in cases you name the folder `CyVersMe` or anything after / before `CyVers`

In [None]:
# CyVers Packages
from PredictAssetData import *

In [None]:
# Parameters

# Pre Process
updateInplace       = False
amountUsdOutlierThr = 1e9

# Asset Data
dataSetName = 'DataSet001'
dataFileName = 'AnibusDAO001.csv'

# Model Data
#modelFolderPath = 'Model_2022_10_05_20_06_21'#'Model_2022_08_16_18_09_18'
modelFolderPath = 'Model_2022_10_11_10_26_18'#'Model_2022_10_05_20_50_14'

In [None]:
# Load the Data Frame
# In deployment this is the data which should be fetched form the database

dfFilePath  = os.path.join(PROJECT_DIR_PATH, DATA_FOLDER_NAME, dataSetName, dataFileName)
dfDataRaw   = pd.read_csv(dfFilePath)

numRows, numCols = dfDataRaw.shape

print(f"The number of rows (Samples): {numRows}, The number of columns: {numCols}, number of unique sender id's: {dfDataRaw['Sender ID'].unique().shape[0]}")
print(f'The data list of columns is: {dfDataRaw.columns} with {len(dfDataRaw.columns)} columns')


In [None]:
# Pre Process Data (Validation)
# We use updateInplace = False in order to not touch the RAW data from the database

dfData = PreProcessData(dfDataRaw, updateInplace = updateInplace, amountUsdOutlierThr = amountUsdOutlierThr)
dfData['Label'] = -1 #<! Just to verify the model runs


In [None]:
# Instantiate the Class
oPredAsset = PredictAssetData(modelFolderPath)


In [None]:
# Validate Data
# This is the longest phase as it also adds our unique Pandas Extension

dfData = oPredAsset.ValidateData(dfData)

In [None]:
# Caclucte Features
# We just calculate 4 features for this phase test

dfData = oPredAsset.CalculateFeatures(dfData)

In [None]:
# Gen Data for Prediction
# This creates a different file which is a subset of teh columns in `dfData` and with NaN and `inf` removed
dfX = oPredAsset.GenDataPredict(dfData)

In [None]:
# Predict Labels
# Updated the `Label` column in dfData
dfData = oPredAsset.PredictLabels(dfX, dfData)

In [None]:
# No `-1`
dfData['Label']