# Ntropy

In [1]:
import os
import pandas as pd  # type: ignore
from pathlib import Path
from enrichment_models.tasks.normalizer.ntropy_wrapper import NtropyNormalizer
from enrichment_models.tasks.labeler.ntropy_wrapper import NtropyLabeler
from enrichment_models.tasks.labeler.utils import evaluate as evaluate_labeler
from enrichment_models.tasks.normalizer.utils import evaluate as evaluate_normalizer

  from .autonotebook import tqdm as notebook_tqdm


## DATASET: 100 CONSUMER TRANSACTIONS (v3)

In [2]:
TEST_SET_PATH = (
    Path(os.path.abspath("")).parent / "datasets/100_labeled_consumer_transactions.csv"
)
OUTPUT_PATH = Path(os.path.abspath("")).parent / "datasets/predictions_results.csv"
GROUND_TRUTH_LABELS_COLUMN = "labels_correct"
GROUND_TRUTH_MERCHANT_COLUMN = "merchant_correct"
GROUND_TRUTH_WEBSITE_COLUMN = "website_correct"

In [3]:
test_set_df = pd.read_csv(TEST_SET_PATH)
test_set_df

Unnamed: 0,description,date,entry_type,amount,iso_currency_code,country,account_holder_type,account_holder_id,merchant_correct,website_correct,labels_correct
0,EARLY PAY: PETCO ANIMAL SUP DIRECT DEP 6610219...,2023-01-01,incoming,1290.44,USD,US,consumer,benchmark_account,Petco,petco.com,Paycheck
1,DOORDASH*ZEEKS PIZZA,2023-01-02,outgoing,82.80,USD,US,consumer,benchmark_account,DoorDash,doordash.com,Food and Drink
2,Withdrawal ACH NSF Fee ($323.55 KWIKCASHKC),2023-01-03,outgoing,19.95,USD,US,consumer,benchmark_account,KwikCash,kwikcashonline.com,Non-sufficient funds / Overdraft fee
3,BILL PAY BMW R18 CLASSIC ...,2023-01-04,outgoing,798.00,USD,US,consumer,benchmark_account,BMW,bmw.com,Auto lease payment
4,1108914IH2NS48MR HABIT PROVO 0279,2023-01-05,outgoing,16.80,USD,US,consumer,benchmark_account,The Habit Burger Grill,habitburger.com,Food and Drink
...,...,...,...,...,...,...,...,...,...,...,...
95,BRICKELL TRAVEL PAYROLL PPD ID: 12871273132,2023-04-06,incoming,278.54,USD,US,consumer,benchmark_account,Brickell Travel Management,brickelltravel.com,Paycheck
96,Payment to Cigna,2023-04-07,outgoing,33.50,USD,US,consumer,benchmark_account,Cigna,cigna.com,Insurance
97,Card purchase 4LR*4LIFERESEARCHLC 123-522-2300...,2023-04-08,outgoing,153.58,USD,US,consumer,benchmark_account,4Life,4life.com,Self care
98,PUMP N SHOP 22 04/09 PURCHASE,2023-04-09,outgoing,13.64,USD,US,consumer,benchmark_account,Clark's Pump-N-Shop | Clark's Pump N Shop,myclarkspns.com,Convenience stores


### Labeler

In [4]:
labeler_ntropy = NtropyLabeler()
preds: list[dict] = labeler_ntropy.predict(test_set_df)
labeler_score: dict[str, float] = evaluate_labeler(
    preds,
    test_set_df,
    correct_labels_column=GROUND_TRUTH_LABELS_COLUMN,
)
test_set_df["prediction_ntropy_labels"] = [pred["labels"] for pred in preds]
print("Score Labeler Ntropy:")
labeler_score

Score Labeler Ntropy:


{'Labeler precision': 0.72,
 'Labeler recall': 0.75,
 'Labeler f1': 0.73,
 'Labeler accuracy': 0.86,
 'Labeler label_similarity': 0.91}

## Normalizer

In [5]:
labeler_ntropy = NtropyNormalizer()
preds: list[dict] = labeler_ntropy.predict(test_set_df)
normalizer_score: dict[str, float] = evaluate_normalizer(
    preds,
    test_set_df,
    correct_merchants_column=GROUND_TRUTH_MERCHANT_COLUMN,
    correct_websites_column=GROUND_TRUTH_WEBSITE_COLUMN,
)
test_set_df["prediction_ntropy_merchant"] = [pred["merchant"] for pred in preds]
test_set_df["prediction_ntropy_website"] = [pred["website"] for pred in preds]
print("Score Normalizer Ntropy:")
normalizer_score

Score Normalizer Ntropy:


{'Merchant accuracy': 0.87, 'Website accuracy': 0.87}

In [6]:
test_set_df

Unnamed: 0,description,date,entry_type,amount,iso_currency_code,country,account_holder_type,account_holder_id,merchant_correct,website_correct,labels_correct,prediction_ntropy_labels,prediction_ntropy_merchant,prediction_ntropy_website
0,EARLY PAY: PETCO ANIMAL SUP DIRECT DEP 6610219...,2023-01-01,incoming,1290.44,USD,US,consumer,f794d0a1-e714-4a0f-8aa2-2434b5fb1680,Petco,petco.com,Paycheck,Paycheck,Petco,petco.com
1,DOORDASH*ZEEKS PIZZA,2023-01-02,outgoing,82.80,USD,US,consumer,f794d0a1-e714-4a0f-8aa2-2434b5fb1680,DoorDash,doordash.com,Food and Drink,Food and Drink,Doordash,doordash.com
2,Withdrawal ACH NSF Fee ($323.55 KWIKCASHKC),2023-01-03,outgoing,19.95,USD,US,consumer,f794d0a1-e714-4a0f-8aa2-2434b5fb1680,KwikCash,kwikcashonline.com,Non-sufficient funds / Overdraft fee,Non-sufficient funds / Overdraft fee,,
3,BILL PAY BMW R18 CLASSIC ...,2023-01-04,outgoing,798.00,USD,US,consumer,f794d0a1-e714-4a0f-8aa2-2434b5fb1680,BMW,bmw.com,Auto lease payment,Other transport,BMW,bmw.com
4,1108914IH2NS48MR HABIT PROVO 0279,2023-01-05,outgoing,16.80,USD,US,consumer,f794d0a1-e714-4a0f-8aa2-2434b5fb1680,The Habit Burger Grill,habitburger.com,Food and Drink,Food and Drink,The Habit Burger Grill,habitburger.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,BRICKELL TRAVEL PAYROLL PPD ID: 12871273132,2023-04-06,incoming,278.54,USD,US,consumer,f794d0a1-e714-4a0f-8aa2-2434b5fb1680,Brickell Travel Management,brickelltravel.com,Paycheck,Paycheck,Brickell Travel Management,brickelltravel.com
96,Payment to Cigna,2023-04-07,outgoing,33.50,USD,US,consumer,f794d0a1-e714-4a0f-8aa2-2434b5fb1680,Cigna,cigna.com,Insurance,Insurance,Cigna,cigna.com
97,Card purchase 4LR*4LIFERESEARCHLC 123-522-2300...,2023-04-08,outgoing,153.58,USD,US,consumer,f794d0a1-e714-4a0f-8aa2-2434b5fb1680,4Life,4life.com,Self care,Self care,4Life,4life.com
98,PUMP N SHOP 22 04/09 PURCHASE,2023-04-09,outgoing,13.64,USD,US,consumer,f794d0a1-e714-4a0f-8aa2-2434b5fb1680,Clark's Pump-N-Shop | Clark's Pump N Shop,myclarkspns.com,Convenience stores,Convenience stores,Clark's Pump-N-Shop,myclarkspns.com


## Save predictions

In [7]:
if OUTPUT_PATH.exists():
    old_preds_df = pd.read_csv(OUTPUT_PATH)
    additional_columns = [
        col for col in old_preds_df.columns if col not in test_set_df.columns
    ]
    test_set_df = pd.concat(
        [test_set_df] + [old_preds_df[col] for col in additional_columns], axis=1
    )

test_set_df.to_csv(OUTPUT_PATH, index=False)