## Energy-based Flow Classifier (EFC) - Pure Python

### Applying EFC to Bitcoin blockchain in the presence of label scarcity

In [1]:
from research_aml_elliptic.src.experiments.general_functions.elliptic_data_preprocessing import run_elliptic_preprocessing_pipeline

Root directory:  /Users/kevinaraujo/repos/dissertation/PPCA-UnB-Dissertation/models/notebooks/research_aml_elliptic


In [2]:
# Import Elliptic data set and set variables
last_time_step = 49
last_train_time_step = 34
only_labeled = True

In [3]:
# '1': 1, -> class1 (illicit)
# '2': 0, -> class2 (licit)
# 'unknown': 2 -> dropped
X_train, X_test, y_train, y_test = run_elliptic_preprocessing_pipeline(last_train_time_step=last_train_time_step,
                                                                             last_time_step=last_time_step,
                                                                             only_labeled=only_labeled)

  df_classes.replace({"class": {"1": 1, "2": 0, "unknown": 2}}, inplace=True)


In [4]:
import numpy as np
from sklearn.metrics import confusion_matrix

In [5]:
from efc_python.classification_functions import (
    one_class_fit,
    one_class_predict,
)
from efc_python.generic_discretize import discretize, get_intervals

In [6]:
intervals = get_intervals(X_train, 10)  # get discretization intervals from train set

In [7]:
X_train = discretize(X_train, intervals)  # discretize train
X_test = discretize(X_test, intervals)  # discretize test

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data.iloc[:, feature].fillna(len(intervals[feature]), inplace=True)
29895   NaN
29896   NaN
29897   NaN
29898   NaN
         ..
46559   NaN
46560   NaN
46561   NaN
46562   NaN
46563   NaN
Name: time_step, Length: 16670, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  data.iloc[:, feature] = pd.cut(
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: val

In [24]:
idx_abnormal = np.where(y_train == 1)[0]  # find abnormal samples indexes in the training set

In [25]:
X_train.drop(idx_abnormal, axis=0, inplace=True)  # remove abnormal samples from training (EFC trains with only benign instances)

In [26]:
y_train.drop(idx_abnormal, axis=0, inplace=True)  # remove the corresponding abonrmal training targets

In [28]:
# EFC's hyperparameters
Q = X_test.values.max()
LAMBDA = 0.5  # pseudocount parameter

In [29]:
coupling, h_i, cutoff, _, _ = one_class_fit(np.array(X_train), Q, LAMBDA)  # train model

In [30]:
y_predicted, energies = one_class_predict(np.array(X_test), coupling, h_i, cutoff, Q)  # test model

In [33]:
# colect results
print("Single-class results")
print('confusion_matrix', confusion_matrix(y_test, y_predicted))

Single-class results
confusion_matrix [[10278  5309]
 [  857   226]]


In [36]:
y_predicted

array([1, 0, 0, ..., 1, 0, 1])

In [43]:
y_test.values

array([0, 0, 0, ..., 1, 0, 1])

In [22]:
from research_aml_elliptic.src.reaml.model_performance import calculate_model_score

In [23]:
from sklearn.metrics import (
    f1_score,
    accuracy_score,
    precision_score,
    recall_score,
    roc_auc_score,
)

In [47]:
model_score = calculate_model_score(y_true=y_test.values, y_pred=y_predicted, metric="f1")

In [50]:
model_score

np.float64(0.06829857963130856)

In [53]:
y_true = y_test.values
y_pred = y_predicted

In [54]:
metric_dict = {
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred, pos_label=1),
        "f1_micro": f1_score(y_true, y_pred, average="micro"),
        "f1_macro": f1_score(y_true, y_pred, average="macro"),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "roc_auc": roc_auc_score(y_true, y_pred),
    }

In [57]:
from pprint import pprint

pprint(metric_dict)

{'accuracy': 0.6301139772045591,
 'f1': np.float64(0.06829857963130856),
 'f1_macro': np.float64(0.4187761889998471),
 'f1_micro': np.float64(0.6301139772045591),
 'precision': np.float64(0.04083107497741644),
 'recall': np.float64(0.20867959372114497),
 'roc_auc': np.float64(0.4340376219712416)}


### Train EFC With 5% Labeled Elliptic Data Set

In [143]:
# elliptic data set from reaml repo
X_train, X_test, y_train, y_test = run_elliptic_preprocessing_pipeline(last_train_time_step=last_train_time_step,
                                                                             last_time_step=last_time_step,
                                                                             only_labeled=only_labeled)

  df_classes.replace({'class': {'1': 1, '2': 0, 'unknown': 2}}, inplace=True)


In [144]:
y_test

29894    0
29895    0
29896    0
29897    1
29898    1
        ..
46559    0
46560    0
46561    1
46562    0
46563    1
Name: class, Length: 16670, dtype: int64

In [145]:
# efc preps
intervals = get_intervals(X_train, 10)  # get discretization intervals from train set
X_train = discretize(X_train, intervals)  # discretize train
X_test = discretize(X_test, intervals)  # discretize test

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data.iloc[:, feature].fillna(len(intervals[feature]), inplace=True)
29895   NaN
29896   NaN
29897   NaN
29898   NaN
         ..
46559   NaN
46560   NaN
46561   NaN
46562   NaN
46563   NaN
Name: time_step, Length: 16670, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  data.iloc[:, feature] = pd.cut(
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: val

In [146]:
indices_illicit = np.where(y_train == 1)[0]

In [147]:
from math import ceil

drop_indices_illicit = np.random.choice(indices_illicit, size=ceil(len(indices_illicit) * 0.95))

In [148]:
# retrieve idxs abnormals and choose 95% of them
# abnormals == class1 (illicit)
# drop random labeled indices
X_train.drop(drop_indices_illicit, axis=0, inplace=True)  # remove abnormal samples from training (EFC trains with only benign instances)
y_train.drop(drop_indices_illicit, axis=0, inplace=True)  # remove the corresponding abonrmal training targets

In [149]:
X_train

Unnamed: 0,time_step,trans_feat_0,trans_feat_1,trans_feat_2,trans_feat_3,trans_feat_4,trans_feat_5,trans_feat_6,trans_feat_7,trans_feat_8,...,agg_feat_62,agg_feat_63,agg_feat_64,agg_feat_65,agg_feat_66,agg_feat_67,agg_feat_68,agg_feat_69,agg_feat_70,agg_feat_71
0,0,9,7,2,3,0,3,0,0,8,...,0,0,1,1,2,3,0,4,0,0
1,0,9,7,3,3,0,3,0,0,7,...,0,0,1,1,2,2,0,4,0,0
2,0,7,0,1,0,0,0,0,8,7,...,0,0,1,1,1,0,0,1,0,0
3,0,7,0,1,0,0,0,0,8,7,...,2,3,0,0,1,0,0,1,0,0
4,0,3,0,1,1,0,0,0,1,2,...,0,1,1,1,1,1,1,3,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29888,9,1,5,4,0,0,0,0,4,2,...,0,0,1,1,1,0,0,1,0,0
29889,9,0,6,4,0,0,0,0,2,0,...,6,9,2,2,0,0,0,0,0,0
29891,9,1,6,4,0,0,0,0,4,1,...,5,7,2,2,1,0,0,1,0,0
29892,9,7,5,4,0,0,0,0,8,7,...,0,0,1,1,1,0,0,1,0,0


In [150]:
# EFC's hyperparameters
Q = np.int64(X_test.values.max())
LAMBDA = 0.5  # pseudocount parameter

In [151]:
coupling, h_i, cutoff, _, _ = one_class_fit(np.array(X_train), Q, LAMBDA)  # train model

In [152]:
y_predicted, energies = one_class_predict(np.array(X_test), coupling, h_i, cutoff, Q)  # test model

In [153]:
model_score = calculate_model_score(y_true=y_test.values, y_pred=y_predicted, metric="f1")
model_score

np.float64(0.05823475887170155)

In [154]:
y_true = y_test.values
y_pred = y_predicted

In [155]:
metric_dict = {
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred, pos_label=1),
        "f1_micro": f1_score(y_true, y_pred, average="micro"),
        "f1_macro": f1_score(y_true, y_pred, average="macro"),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "roc_auc": roc_auc_score(y_true, y_pred),
    }

In [156]:
from pprint import pprint

pprint(metric_dict)

{'accuracy': 0.6274745050989802,
 'f1': np.float64(0.05823475887170155),
 'f1_macro': np.float64(0.4130252535104788),
 'f1_micro': np.float64(0.6274745050989802),
 'precision': np.float64(0.03483941208492107),
 'recall': np.float64(0.1772853185595568),
 'roc_auc': np.float64(0.4180197042531537)}


### Train EFC With 10% Labeled Elliptic Data Set

In [8]:
# elliptic data set from reaml repo
X_train, X_test, y_train, y_test = run_elliptic_preprocessing_pipeline(last_train_time_step=last_train_time_step,
                                                                             last_time_step=last_time_step,
                                                                             only_labeled=only_labeled)

  df_classes.replace({"class": {"1": 1, "2": 0, "unknown": 2}}, inplace=True)


In [9]:
# efc preps
intervals = get_intervals(X_train, 10)  # get discretization intervals from train set
X_train = discretize(X_train, intervals)  # discretize train
X_test = discretize(X_test, intervals)  # discretize test

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data.iloc[:, feature].fillna(len(intervals[feature]), inplace=True)
29895   NaN
29896   NaN
29897   NaN
29898   NaN
         ..
46559   NaN
46560   NaN
46561   NaN
46562   NaN
46563   NaN
Name: time_step, Length: 16670, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  data.iloc[:, feature] = pd.cut(
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: val

In [10]:
indices_illicit = np.where(y_train == 1)[0]

In [11]:
from math import ceil

drop_indices_illicit = np.random.choice(indices_illicit, size=ceil(len(indices_illicit) * 0.90))

In [12]:
# retrieve idxs abnormals and choose 90% of them
# abnormals == class1 (illicit)
# drop random labeled indices
X_train.drop(drop_indices_illicit, axis=0, inplace=True)  # remove abnormal samples from training (EFC trains with only benign instances)
y_train.drop(drop_indices_illicit, axis=0, inplace=True)  # remove the corresponding abonrmal training targets

In [13]:
X_train

Unnamed: 0,time_step,trans_feat_0,trans_feat_1,trans_feat_2,trans_feat_3,trans_feat_4,trans_feat_5,trans_feat_6,trans_feat_7,trans_feat_8,...,agg_feat_62,agg_feat_63,agg_feat_64,agg_feat_65,agg_feat_66,agg_feat_67,agg_feat_68,agg_feat_69,agg_feat_70,agg_feat_71
0,0,9,7,2,3,0,3,0,0,8,...,0,0,1,1,2,3,0,4,0,0
1,0,9,7,3,3,0,3,0,0,7,...,0,0,1,1,2,2,0,4,0,0
2,0,7,0,1,0,0,0,0,8,7,...,0,0,1,1,1,0,0,1,0,0
3,0,7,0,1,0,0,0,0,8,7,...,2,3,0,0,1,0,0,1,0,0
4,0,3,0,1,1,0,0,0,1,2,...,0,1,1,1,1,1,1,3,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29889,9,0,6,4,0,0,0,0,2,0,...,6,9,2,2,0,0,0,0,0,0
29890,9,0,5,5,0,0,0,0,3,0,...,0,8,1,1,0,0,0,0,0,0
29891,9,1,6,4,0,0,0,0,4,1,...,5,7,2,2,1,0,0,1,0,0
29892,9,7,5,4,0,0,0,0,8,7,...,0,0,1,1,1,0,0,1,0,0


In [14]:
# EFC's hyperparameters
Q = np.int64(X_test.values.max())
LAMBDA = 0.5  # pseudocount parameter

In [15]:
coupling, h_i, cutoff, _, _ = one_class_fit(np.array(X_train), Q, LAMBDA)  # train model

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 166/166 [00:00<00:00, 4954.07it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 166/166 [00:46<00:00,  3.55it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 166/166 [00:01<00:00, 100.56it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 166/166 [00:01<00:00, 87.69it/s]


In [16]:
y_predicted, energies = one_class_predict(np.array(X_test), coupling, h_i, cutoff, Q)  # test model

In [19]:
model_score = calculate_model_score(y_true=y_test.values, y_pred=y_predicted, metric="f1")
model_score

np.float64(0.05826126536185708)

In [20]:
y_true = y_test.values
y_pred = y_predicted

In [24]:
metric_dict = {
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred, pos_label=1),
        "f1_micro": f1_score(y_true, y_pred, average="micro"),
        "f1_macro": f1_score(y_true, y_pred, average="macro"),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "roc_auc": roc_auc_score(y_true, y_pred),
    }

In [25]:
from pprint import pprint

pprint(metric_dict)

{'accuracy': 0.6276544691061787,
 'f1': np.float64(0.05826126536185708),
 'f1_macro': np.float64(0.41310760378265193),
 'f1_micro': np.float64(0.6276544691061787),
 'precision': np.float64(0.034858387799564274),
 'recall': np.float64(0.1772853185595568),
 'roc_auc': np.float64(0.41811593829434185)}


In [26]:
# colect results
print("Single-class results")
print('confusion_matrix', confusion_matrix(y_test, y_predicted))

Single-class results
confusion_matrix [[10271  5316]
 [  891   192]]


### Train EFC With 100% Labeled Elliptic Data Set

In [171]:
# elliptic data set from reaml repo
X_train, X_test, y_train, y_test = run_elliptic_preprocessing_pipeline(last_train_time_step=last_train_time_step,
                                                                             last_time_step=last_time_step,
                                                                             only_labeled=only_labeled)

  df_classes.replace({'class': {'1': 1, '2': 0, 'unknown': 2}}, inplace=True)


In [172]:
# efc preps
intervals = get_intervals(X_train, 10)  # get discretization intervals from train set
X_train = discretize(X_train, intervals)  # discretize train
X_test = discretize(X_test, intervals)  # discretize test

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data.iloc[:, feature].fillna(len(intervals[feature]), inplace=True)
29895   NaN
29896   NaN
29897   NaN
29898   NaN
         ..
46559   NaN
46560   NaN
46561   NaN
46562   NaN
46563   NaN
Name: time_step, Length: 16670, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  data.iloc[:, feature] = pd.cut(
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: val

In [173]:
# EFC's hyperparameters
Q = np.int64(X_test.values.max())
LAMBDA = 0.5  # pseudocount parameter

In [174]:
coupling, h_i, cutoff, _, _ = one_class_fit(np.array(X_train), Q, LAMBDA)  # train model

In [175]:
y_predicted, energies = one_class_predict(np.array(X_test), coupling, h_i, cutoff, Q)  # test model

In [176]:
model_score = calculate_model_score(y_true=y_test.values, y_pred=y_predicted, metric="f1")
model_score

np.float64(0.058654572940287225)

In [177]:
y_true = y_test.values
y_pred = y_predicted

In [178]:
metric_dict = {
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred, pos_label=1),
        "f1_micro": f1_score(y_true, y_pred, average="micro"),
        "f1_macro": f1_score(y_true, y_pred, average="macro"),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "roc_auc": roc_auc_score(y_true, y_pred),
    }

In [179]:
from pprint import pprint

pprint(metric_dict)

{'accuracy': 0.6264547090581883,
 'f1': np.float64(0.058654572940287225),
 'f1_macro': np.float64(0.412825883289601),
 'f1_micro': np.float64(0.6264547090581883),
 'precision': np.float64(0.03506869125090383),
 'recall': np.float64(0.1791320406278855),
 'roc_auc': np.float64(0.41833358302645957)}


### Train EFC Without Labeled Data

In [184]:
# elliptic data set from reaml repo
X_train, X_test, y_train, y_test = run_elliptic_preprocessing_pipeline(last_train_time_step=last_train_time_step,
                                                                             last_time_step=last_time_step,
                                                                             only_labeled=only_labeled)

  df_classes.replace({'class': {'1': 1, '2': 0, 'unknown': 2}}, inplace=True)


In [185]:
# efc preps
intervals = get_intervals(X_train, 10)  # get discretization intervals from train set
X_train = discretize(X_train, intervals)  # discretize train
X_test = discretize(X_test, intervals)  # discretize test

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data.iloc[:, feature].fillna(len(intervals[feature]), inplace=True)
29895   NaN
29896   NaN
29897   NaN
29898   NaN
         ..
46559   NaN
46560   NaN
46561   NaN
46562   NaN
46563   NaN
Name: time_step, Length: 16670, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  data.iloc[:, feature] = pd.cut(
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: val

In [186]:
idx_abnormal = np.where(y_train == 1)[0]  # find abnormal samples indexes in the training set
X_train.drop(idx_abnormal, axis=0, inplace=True)  # remove abnormal samples from training (EFC trains with only benign instances)
y_train.drop(idx_abnormal, axis=0, inplace=True)  # remove the corresponding abonrmal training targets

In [187]:
# EFC's hyperparameters
Q = np.int64(X_test.values.max())
LAMBDA = 0.5  # pseudocount parameter

In [188]:
coupling, h_i, cutoff, _, _ = one_class_fit(np.array(X_train), Q, LAMBDA)  # train model

In [189]:
y_predicted, energies = one_class_predict(np.array(X_test), coupling, h_i, cutoff, Q)  # test model

In [190]:
model_score = calculate_model_score(y_true=y_test.values, y_pred=y_predicted, metric="f1")

In [191]:
y_true = y_test.values
y_pred = y_predicted

In [192]:
metric_dict = {
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred, pos_label=1),
        "f1_micro": f1_score(y_true, y_pred, average="micro"),
        "f1_macro": f1_score(y_true, y_pred, average="macro"),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "roc_auc": roc_auc_score(y_true, y_pred),
    }

In [193]:
from pprint import pprint

pprint(metric_dict)

{'accuracy': 0.6301139772045591,
 'f1': np.float64(0.06829857963130856),
 'f1_macro': np.float64(0.4187761889998471),
 'f1_micro': np.float64(0.6301139772045591),
 'precision': np.float64(0.04083107497741644),
 'recall': np.float64(0.20867959372114497),
 'roc_auc': np.float64(0.4340376219712416)}


### Train EFC Without Discretization

In [1]:
from efc_python.classification_functions import (
    one_class_fit,
    one_class_predict,
)
from efc_python.generic_discretize import discretize, get_intervals

In [6]:
# elliptic data set from reaml repo
X_train, X_test, y_train, y_test = run_elliptic_preprocessing_pipeline(last_train_time_step=last_train_time_step,
                                                                             last_time_step=last_time_step,
                                                                             only_labeled=only_labeled)

  df_classes.replace({"class": {"1": 1, "2": 0, "unknown": 2}}, inplace=True)


In [9]:
# EFC's hyperparameters
Q = np.int64(X_test.values.max())
LAMBDA = 0.5  # pseudocount parameter

In [10]:
# train model
coupling, h_i, cutoff, _, _ = one_class_fit(np.array(X_train), Q, LAMBDA)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 166/166 [00:00<00:00, 172.40it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 166/166 [2:09:08<00:00, 46.68s/it]
100%|█| 166/166 [21:51<00:00,  7.90s/i
  coupling_matrix = np.exp(np.negative(inv_corr))
  fields[i * (Q - 1) + ai] /= (
  fields[i * (Q - 1) + ai] /= (
  fields[i * (Q - 1) + ai] /= (
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 166/166 [3:10:46<00:00, 68.96s/it]
  c_matrix = np.log(c_matrix)
  h_i = np.log(h_i)


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [205]:
y_predicted, energies = one_class_predict(np.array(X_test), coupling, h_i, cutoff, Q)  # test model

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [None]:
model_score = calculate_model_score(y_true=y_test.values, y_pred=y_predicted, metric="f1")

In [None]:
y_true = y_test.values
y_pred = y_predicted

In [None]:
metric_dict = {
        "accuracy": accuracy_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred, pos_label=1),
        "f1_micro": f1_score(y_true, y_pred, average="micro"),
        "f1_macro": f1_score(y_true, y_pred, average="macro"),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "roc_auc": roc_auc_score(y_true, y_pred),
    }

In [None]:
from pprint import pprint

pprint(metric_dict)