# LLAMA (Finetuned on transactions data)

In [None]:
import os
import pandas as pd  # type: ignore
from pathlib import Path
from enrichment_models.tasks.llm_wrapper import LLMWrapper
from enrichment_models.llms.llama import LLAMA
from enrichment_models.tasks.labeler.prompter import LabelerPrompter
from enrichment_models.tasks.labeler.utils import evaluate as evaluate_labeler
from enrichment_models.tasks.normalizer.utils import evaluate as evaluate_normalizer

### Init LLM

In [2]:
# llama_7B = LLAMA(
#     base_model="decapoda-research/llama-7b-hf",
#     lora_weights="ntropydev/ntropy-labeler-llama-lora-7b",
#     batch_size=16,
# )
# model_name = "llama_7b"
llama_13B = LLAMA(
    base_model="decapoda-research/llama-13b-hf",
    lora_weights="ntropydev/ntropy-labeler-llama-lora-13b",
    batch_size=16,
)
model_name = "llama_13b"

Loading checkpoint shards: 100%|██████████| 41/41 [00:32<00:00,  1.26it/s]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


In [3]:
LLM = llama_13B

## DATASET: 100 CONSUMER TRANSACTIONS (v3)

In [4]:
TEST_SET_PATH = (
    Path(os.path.abspath("")).parent / "datasets/100_labeled_consumer_transactions.csv"
)
OUTPUT_PATH = Path(os.path.abspath("")).parent / "datasets/predictions_results.csv"
GROUND_TRUTH_LABELS_COLUMN = "labels_correct"
GROUND_TRUTH_MERCHANT_COLUMN = "merchant_correct"
GROUND_TRUTH_WEBSITE_COLUMN = "website_correct"

In [5]:
test_set_df = pd.read_csv(TEST_SET_PATH)
test_set_df

Unnamed: 0,description,date,entry_type,amount,iso_currency_code,country,account_holder_type,account_holder_id,merchant_correct,website_correct,labels_correct
0,EARLY PAY: PETCO ANIMAL SUP DIRECT DEP 6610219...,2023-01-01,incoming,1290.44,USD,US,consumer,benchmark_account,Petco,petco.com,Paycheck
1,DOORDASH*ZEEKS PIZZA,2023-01-02,outgoing,82.80,USD,US,consumer,benchmark_account,DoorDash,doordash.com,Food and Drink
2,Withdrawal ACH NSF Fee ($323.55 KWIKCASHKC),2023-01-03,outgoing,19.95,USD,US,consumer,benchmark_account,KwikCash,kwikcashonline.com,Non-sufficient funds / Overdraft fee
3,BILL PAY BMW R18 CLASSIC ...,2023-01-04,outgoing,798.00,USD,US,consumer,benchmark_account,BMW,bmw.com,Auto lease payment
4,1108914IH2NS48MR HABIT PROVO 0279,2023-01-05,outgoing,16.80,USD,US,consumer,benchmark_account,The Habit Burger Grill,habitburger.com,Food and Drink
...,...,...,...,...,...,...,...,...,...,...,...
95,BRICKELL TRAVEL PAYROLL PPD ID: 12871273132,2023-04-06,incoming,278.54,USD,US,consumer,benchmark_account,Brickell Travel Management,brickelltravel.com,Paycheck
96,Payment to Cigna,2023-04-07,outgoing,33.50,USD,US,consumer,benchmark_account,Cigna,cigna.com,Insurance
97,Card purchase 4LR*4LIFERESEARCHLC 123-522-2300...,2023-04-08,outgoing,153.58,USD,US,consumer,benchmark_account,4Life,4life.com,Self care
98,PUMP N SHOP 22 04/09 PURCHASE,2023-04-09,outgoing,13.64,USD,US,consumer,benchmark_account,Clark's Pump-N-Shop | Clark's Pump N Shop,myclarkspns.com,Convenience stores


### Labeler

In [6]:
prompter = LabelerPrompter()
labeler_llm = LLMWrapper(llm=LLM, prompter=prompter)
preds = labeler_llm.predict(test_set_df)
labeler_score = evaluate_labeler(
    preds, test_set_df, correct_labels_column=GROUND_TRUTH_LABELS_COLUMN
)
test_set_df["prediction_labels_llama_13b"] = [pred["labels"] for pred in preds]
print("Score:")
labeler_score

Inference (bs=16): 100%|██████████| 7/7 [00:34<00:00,  4.88s/it]

Score:





{'Labeler precision': 0.68,
 'Labeler recall': 0.66,
 'Labeler f1': 0.65,
 'Labeler accuracy': 0.78,
 'Labeler label_similarity': 0.87}

In [7]:
test_set_df

Unnamed: 0,description,date,entry_type,amount,iso_currency_code,country,account_holder_type,account_holder_id,merchant_correct,website_correct,labels_correct,prediction_labels_llama_13b
0,EARLY PAY: PETCO ANIMAL SUP DIRECT DEP 6610219...,2023-01-01,incoming,1290.44,USD,US,consumer,benchmark_account,Petco,petco.com,Paycheck,Paycheck
1,DOORDASH*ZEEKS PIZZA,2023-01-02,outgoing,82.80,USD,US,consumer,benchmark_account,DoorDash,doordash.com,Food and Drink,Food and Drink
2,Withdrawal ACH NSF Fee ($323.55 KWIKCASHKC),2023-01-03,outgoing,19.95,USD,US,consumer,benchmark_account,KwikCash,kwikcashonline.com,Non-sufficient funds / Overdraft fee,Non-sufficient funds / Overdraft fee
3,BILL PAY BMW R18 CLASSIC ...,2023-01-04,outgoing,798.00,USD,US,consumer,benchmark_account,BMW,bmw.com,Auto lease payment,Auto lease payment
4,1108914IH2NS48MR HABIT PROVO 0279,2023-01-05,outgoing,16.80,USD,US,consumer,benchmark_account,The Habit Burger Grill,habitburger.com,Food and Drink,Food and Drink
...,...,...,...,...,...,...,...,...,...,...,...,...
95,BRICKELL TRAVEL PAYROLL PPD ID: 12871273132,2023-04-06,incoming,278.54,USD,US,consumer,benchmark_account,Brickell Travel Management,brickelltravel.com,Paycheck,Paycheck
96,Payment to Cigna,2023-04-07,outgoing,33.50,USD,US,consumer,benchmark_account,Cigna,cigna.com,Insurance,Insurance
97,Card purchase 4LR*4LIFERESEARCHLC 123-522-2300...,2023-04-08,outgoing,153.58,USD,US,consumer,benchmark_account,4Life,4life.com,Self care,Other non-essential
98,PUMP N SHOP 22 04/09 PURCHASE,2023-04-09,outgoing,13.64,USD,US,consumer,benchmark_account,Clark's Pump-N-Shop | Clark's Pump N Shop,myclarkspns.com,Convenience stores,Convenience stores


## Save predictions

In [8]:
if OUTPUT_PATH.exists():
    old_preds_df = pd.read_csv(OUTPUT_PATH)
    additional_columns = [
        col for col in old_preds_df.columns if col not in test_set_df.columns
    ]
    test_set_df = pd.concat(
        [test_set_df] + [old_preds_df[col] for col in additional_columns], axis=1
    )

test_set_df.to_csv(OUTPUT_PATH, index=False)