[![CyVers](https://i.imgur.com/yyhmZET.png)](https://www.cyvers.ai/)

# Create Confusion Matrix

> Notebook by:
> - Royi Avital Royi@cyvers.ai

## Revision History

| Version | Date       | User        |Content / Changes                                                   |
|---------|------------|-------------|--------------------------------------------------------------------|
| 0.1.000 | 07/08/2022 | Royi Avital | First version                                                      |
|         |            |             |                                                                    |

In [None]:
# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Misc
import datetime
import os
from platform import python_version
import random
import warnings

# EDA Tools
import ppscore as pps #<! See https://github.com/8080labs/ppscore -> pip install git+https://github.com/8080labs/ppscore.git

# Machine Learning
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.manifold import TSNE
# from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Metrics
from sklearn.metrics import confusion_matrix, fbeta_score, precision_score, recall_score
from sklearn.model_selection import GridSearchCV, StratifiedKFold, StratifiedGroupKFold, train_test_split

# Ensemble Engines
from catboost import CatBoostClassifier, Pool
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

# Visualization
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from bokeh.plotting import figure, show

# Jupyter
from ipywidgets import interact, Dropdown, Layout

In [None]:
# Configuration
%matplotlib inline

warnings.filterwarnings("ignore")

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

sns.set_theme() #>! Apply SeaBorn theme

In [None]:
# Constants

DATA_FOLDER_NAME    = 'BlockChainAttacksDataSet'
DATA_FOLDER_PATTERN = 'DataSet001'
DATA_FILE_EXT       = 'csv'

PROJECT_DIR_NAME = 'CyVers' #<! Royi: Anton, don't change it, it should be a team constant
PROJECT_DIR_PATH = os.path.join(os.getcwd()[:os.getcwd().find(PROJECT_DIR_NAME)], PROJECT_DIR_NAME) #>! Pay attention, it will create issues in cases you name the folder `CyVersMe` or anything after / before `CyVers`

# Feature extractors constants

TRAIN_BY_TSX    = 1
TRAIN_BY_FILES  = 2

In [None]:
# CyVers Packages
from DataSetsAuxFun import *

In [None]:
# Parameters

numTsx      = 522347
numAttacks  = 949

paramPd = 0.82
paramFa = 0.0005

In [None]:
# Generating Data

vY      = np.zeros(numTsx)
vYPred  = np.zeros(numTsx)

vAttackIdx      = np.random.choice(numTsx, numAttacks, replace = False) #<! np.random.permutation(numTsx)[:numAttacks]
vAttackIdxPred  = vAttackIdx[np.random.choice(numAttacks, np.int(paramPd * numAttacks), replace = False)] #<!vAttackIdx[np.random.permutation(numAttacks)[:np.int(paramPd * numAttacks)]]
vAttackIdxFa    = np.random.choice(numTsx, np.int(paramFa * numTsx), replace = False) #<! np.random.permutation(numTsx)[:np.int(paramFa * numTsx)]

vY[vAttackIdx]          = 1
vYPred[vAttackIdxPred]  = 1
vYPred[vAttackIdxFa]    = 1

In [None]:
DisplayConfusionMatrix(vY, vYPred, lClasses = [0, 1])

In [None]:
dsScoreSumm = GenClassifierSummaryResults(vY, vYPred)
dfScoreSummary  = pd.DataFrame(dsScoreSumm, columns = ['Score'])
dfScoreSummary