In [1]:
from datetime import datetime, timedelta
import pandas as pd
from misinspect.datasets.payment_transaction import generate_transaction_data
from misinspect.analysis.binary import MisClassifiedTxnAnalyzer
from misinspect.gui.jupyter import MisClassifiedTxnVisualizer

In [2]:
normal = generate_transaction_data(
    1000, 
    (1000, 1010), 
    (1, 3), 
    (datetime.now() - timedelta(days=100), datetime.now() - timedelta(days=5)),
    ["card", "QR", "cash"],
    0,
)

fraud = generate_transaction_data(
    50, 
    (1000, 1010), 
    (1, 3), 
    (datetime.now() - timedelta(days=4), datetime.now()),
    ["card", "QR", "cash"],
    1,
)

dataset = pd.concat([normal, fraud], axis=0).reset_index(drop=True)

In [3]:
analyzer = MisClassifiedTxnAnalyzer(
    dataset=dataset,
    user_id_col = "user_id",
    price_col = "price",
    datetime_col = "use_dt",
    prob_col = "probability",
    label_col = "label",
)

In [4]:
widget = MisClassifiedTxnVisualizer(analyzer)
widget.show()

Dropdown(description='Select threshold: ', options=('0.50', '0.55', '0.60', '0.65', '0.70', '0.75', '0.80', '0…

Select(description='Select misclassification type: ', layout=Layout(width='150px'), options=('FP', 'FN'), valu…

Dropdown(description='Select User Id: ', options=(), value=None)

HBox(children=(Button(description='display user data', style=ButtonStyle()), Button(description='plot payment …

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…

In [5]:
# 各確率の閾値を超えるデータの FP、FN の件数を計算
fp_fn_counts = []

prob_thresholds = [i / 20 for i in range(10, 21)]  # 0.5 から 1.0 まで 0.05 刻み

for threshold in prob_thresholds:
    # 閾値を超えるデータ
    above_threshold_data = dataset[dataset['probability'] > threshold]
    
    # FP と FN の件数をカウント
    fp_count = above_threshold_data[(above_threshold_data['label'] == 0)].shape[0]
    fn_count = above_threshold_data[(above_threshold_data['label'] == 1)].shape[0]

    fp_fn_counts.append({'Threshold': threshold, 'FP': fp_count, 'FN': fn_count})

# DataFrame に変換
fp_fn_df = pd.DataFrame(fp_fn_counts)

fp_fn_df



Unnamed: 0,Threshold,FP,FN
0,0.5,165,42
1,0.55,75,41
2,0.6,0,36
3,0.65,0,31
4,0.7,0,25
5,0.75,0,20
6,0.8,0,19
7,0.85,0,18
8,0.9,0,9
9,0.95,0,7
