In [None]:
import sys
sys.append('..')

In [1]:
import time
import random
import numpy as np
from tqdm import tqdm
import pandas as pd
from os.path import join as pjoin

from utils import (
    dob_to_age, 
    convert_transaction_data_to_str,
    get_user_basic_info,
    get_transactional_data,
    embed_transaction,
    insert_transaction,
    get_as_close_transactions,
    load_data,
    detect_anomalies
)

In [2]:
root = '/home/quamer23nasim38/reverse-recommendation-for-anomaly-detection/'
data_path = 'data/fraudTrain.csv'

In [None]:
data, random_cc_num = load_data(root, data_path)

In [5]:
from transformers import AutoTokenizer, AutoModel

# Load the pre-trained model
embedding_model_id = "BAAI/bge-small-en"
tokenizer = AutoTokenizer.from_pretrained(embedding_model_id)
model = AutoModel.from_pretrained(embedding_model_id)
model.eval()
print("Model loaded successfully")

Model loaded successfully


In [6]:
from qdrant_client import QdrantClient, models

# Initialize in-memory Qdrant client
client = QdrantClient(":memory:")

# Create a collection in Qdrant for storing transaction embeddings
client.create_collection(
    collection_name="transactions",
    vectors_config=models.VectorParams(size=384, distance=models.Distance.COSINE)
)

True

In [8]:
for user in random_cc_num:
    # Get the data for the user
    user_data = data[data['cc_num'] == user]
    # Filter out the fraud transactions
    user_data = user_data[user_data['is_fraud'] == 0]
    if user_data.shape[0]>1500:
        # Get the basic information for the user
        customer_information, registered_address = get_user_basic_info(user_data.iloc[0])
        print(f"Customer Information Loaded Successfully for {user}")
        break

Customer Information Loaded Successfully for 370612217861404


In [None]:
for idx, (_, transaction) in tqdm(enumerate(user_data.iterrows()), total=len(user_data)):
    # get the transactional information for a particular transaction
    transaction_information, merchant_information, payment_address, merchant_address = get_transactional_data(transaction, convert_coordinates_to_address=True)
    # convert the transaction information to string
    transaction_description = convert_transaction_data_to_str(transaction_information, merchant_information, payment_address, merchant_address)
    # embed the transaction description
    embedding = embed_transaction(transaction_description)
    embedding = embedding[0].tolist()
    # upload the transaction embedding and data to the qdrant client
    insert_transaction(embedding, transaction_description, idx)
    time.sleep(1)

In [11]:
new_transaction_info = '''
420000.54
-----------------------
Rajesh, Kumar; savings_account
-----------------------
Chandini Chowk; Delhi; India; 20.0583; 16.008
-----------------------
Vietnaam; 20.152538; 16.227746
'''

In [12]:
# Embed the new transaction information
new_embedding = embed_transaction(new_transaction_info)
results = get_as_close_transactions(new_embedding, client)

In [None]:
if detect_anomalies(results):
    print("The new transaction is fraudulent")
else:
    print("The new transaction is genuine")