[![CyVers](https://i.imgur.com/yyhmZET.png)](https://www.cyvers.ai/)

# Deployed Algorithm Workflow

This notebooks shows the workflow of using an algorithm.  
See [`SystemEngineering.md](https://github.com/CyVers-AI/CyVersManagement/blob/main/Engineering/SystemEngineering.md) for details.

> Notebook by:
> - Anton Rudenko Anton@cyvers.ai
> - Royi Avital Royi@cyvers.ai

## Revision History

| Version | Date       | User        |Content / Changes                                                   |
|---------|------------|-------------|--------------------------------------------------------------------|
| 0.1.000 | 16/08/2022 | Royi Avital | Matching version 0.8 of the API                                    |
|         |            |             |                                                                    |

In [1]:
# Load Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Misc
import datetime
import os
from platform import python_version
import random
import warnings

# Visualization
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from bokeh.plotting import figure, show

import ruamel.yaml#import yaml

In [2]:
# Configuration
%matplotlib inline

warnings.filterwarnings("ignore")

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

sns.set_theme() #>! Apply SeaBorn theme

In [3]:
with open(r'params_no_gas_.yml') as file:#with open(r'params_no_gas.yml') as file:
    params = ruamel.yaml.safe_load(file)


In [4]:
# Constants

DATA_FOLDER_NAME    = 'Validated'#'BlockChainAttacksDataSet'
DATA_FOLDER_PATTERN = 'b1'#'DataSet0'
DATA_FILE_EXT       = 'csv'

PROJECT_DIR_NAME = 'CyVers' #<! Royi: Anton, don't change it, it should be a team constant
PROJECT_DIR_PATH = os.path.join(os.getcwd()[:os.getcwd().find(PROJECT_DIR_NAME)], PROJECT_DIR_NAME) #>! Pay attention, it will create issues in cases you name the folder `CyVersMe` or anything after / before `CyVers`
modelFolderPath ='Model_2022_12_02_12_56_34'#'Model_2022_10_13_11_39_22'#'Model_2022_10_12_12_59_51' #'Model_2022_10_05_20_50_14'

updateInplace       = False
amountUsdOutlierThr = 1e9


In [5]:
# CyVers Packages
from PredictAssetData import *

In [6]:
# Parameters

# Pre Process

# Asset Data
dataSetName = 'n1'#'b2'#'DataSet006'
dataFileName = 'Solidus.csv'#'BiFinance001.csv'#'Bit2c_Gas_Data.csv'#'EtherDelta004.csv'#'EtherDelta012.csv'#'FakeDfinityTokenSale001.csv'#'0x4fa03aefa74559aafc6e26fad2bc801006c6c5ab_1581141935.csv'#'0x36983f0dcbfcaa2b978eb0f34e53505cc0fb4d97_1665514019.csv'#'Bitpoint003.csv'#'AnibusDAO001.csv'


In [7]:
if not params['MODEL_FILE_HASH'] == hashfile(os.path.join(modelFolderPath, 'Model.pkl')):
    raise ValueError('MODEL FILE IS NOT CORRECT !!!!!!!!!!!!')
    

In [None]:
# Loading / Generating Data multiple files !!!!!!!!!!!!!!!!
dataSetRotoDir = os.path.join(PROJECT_DIR_PATH, DATA_FOLDER_NAME)
lCsvFile = ExtractCsvFiles(dataSetRotoDir, folderNamePattern = DATA_FOLDER_PATTERN)
print(f'The number of file found: {len(lCsvFile)}')

lCsvColName     = ['Transaction ID', 'Block Time', 'Transaction Time', 'Sender ID', 'Receiver ID', 'Receiver Type', 'Amount', 'Currency', 'Currency Hash', 'Currency Type', 'Amount [USD]', 'Gas Price', 'Gas Limit', 'Gas Used', 'Gas Predicted', 'Balance In', 'Balance Out', 'Label', 'Risk Level']
lCsvColNameFlag = [True,              True,         True,               True,        True,          True,            True,     True,       True,            True,            True,           True,        True,        True,       True,            True,         True,          False,   False]  #<! Flags if a column is a must to have

# dfData = pd.read_csv(os.path.join(DATA_FOLDER_NAME, csvFileName))
#dfData, dAssetFile = LoadCsvFilesDf(lCsvFile, baseFoldePath = '')
dfDataRaw, dAssetFile =  LoadCsvFilesDf(lCsvFile, baseFoldePath = '', lColName = lCsvColName, lColFlag =  lCsvColNameFlag ,  addFileNameCol = True)

numRows, numCols = dfDataRaw.shape

print(f"The number of rows (Samples): {numRows}, The number of columns: {numCols}, number of unique sender id's: {dfDataRaw['Sender ID'].unique().shape}")
print(f'The data list of columns is: {dfDataRaw.columns} with {len(dfDataRaw.columns)} columns')

In [8]:
# Load the Data Frame  Single File !!!!!!!!!!!!!!!!!!
# In deployment this is the data which should be fetched form the database

dfFilePath  = os.path.join(PROJECT_DIR_PATH, DATA_FOLDER_NAME, dataSetName, dataFileName)
dfDataRaw   = pd.read_csv(dfFilePath)

numRows, numCols = dfDataRaw.shape

print(f"The number of rows (Samples): {numRows}, The number of columns: {numCols}, number of unique sender id's: {dfDataRaw['Sender ID'].unique().shape[0]}")
print(f'The data list of columns is: {dfDataRaw.columns} with {len(dfDataRaw.columns)} columns')


The number of rows (Samples): 251, The number of columns: 19, number of unique sender id's: 1
The data list of columns is: Index(['Transaction ID', 'Transaction Time', 'Block Time', 'Sender ID',
       'Receiver ID', 'Receiver Type', 'Amount', 'Currency', 'Currency Hash',
       'Currency Type', 'Amount [USD]', 'Gas Price', 'Gas Limit', 'Gas Used',
       'Gas Predicted', 'Balance In', 'Balance Out', 'Label', 'Risk Level'],
      dtype='object') with 19 columns


In [9]:
# Pre Process Data (Validation)
# We use updateInplace = False in order to not touch the RAW data from the database

dfData = PreProcessData(dfDataRaw, updateInplace = updateInplace, amountUsdOutlierThr = amountUsdOutlierThr)
#dfData['Label'] = -1 #<! Just to verify the model runs


In [10]:
# Instantiate the Class
oPredAsset = PredictAssetData(modelFolderPath)


In [11]:
#oPredAsset.modelFolderPath


In [12]:
# Validate Data
# This is the longest phase as it also adds our unique Pandas Extension

dfData = oPredAsset.ValidateData(dfData)

In [13]:
# Caclucte Features
# We just calculate 4 features for this phase test

dfData = oPredAsset.CalculateFeatures(dfData)

In [14]:
# Gen Data for Prediction
# This creates a different file which is a subset of teh columns in `dfData` and with NaN and `inf` removed
dfX = oPredAsset.GenDataPredict(dfData)

In [15]:
# Predict Labels
# Updated the `Label` column in dfData
dfData = oPredAsset.PredictLabels(dfX, dfData)

In [16]:
# No `-1`
dfData['Label_predicted'].value_counts()#dfData['Label'].value_counts()

0    243
Name: Label_predicted, dtype: int64

In [None]:
dfData[(dfData['Label_predicted'] == 1) & (dfData['Label'] ==0)]['Label_predicted'].value_counts()

In [None]:
dfData[(dfData['Label_predicted'] == 1) & (dfData['Label'] ==1)]['Label_predicted'].value_counts()

In [None]:
#dfData[['Sender ID','Receiver ID','Amount [USD]', 'Label']]
#dfData.columns