In [1]:
import pandas as pd
from sklearn import preprocessing
import tensorflow as tf

In [2]:
# Load your dataset
df = pd.read_csv('paysim/paysim.csv')  # replace with actual file path

In [3]:
# Label Encoding Function
def df_label_encoder(df, columns):
    le = preprocessing.LabelEncoder()
    for col in columns:
        df[col] = le.fit_transform(df[col].astype(str))
    return df

In [4]:
def preprocess(df):
    # List of columns to label encode
    label_columns = ['type', 'nameOrig', 'nameDest']

    # Perform label encoding on categorical columns
    df = df_label_encoder(df, label_columns)

    # Now return the processed dataframe
    return df

In [5]:
# Node Embedding Functions
def transaction_aggregate(df):
    # Create a dataframe of unique accounts
    accounts = pd.DataFrame()

    # Concatenate nameOrig and nameDest to get all unique account names
    accounts['Account'] = pd.concat([df['nameOrig'], df['nameDest']]).unique()

    # Example: Average transaction amount and balances for each account (origin)
    temp_orig = df.groupby('nameOrig').agg({
        'amount': 'mean',
        'oldbalanceOrg': 'mean',
        'newbalanceOrig': 'mean'
    }).reset_index()
    temp_orig = temp_orig.rename(columns={'nameOrig': 'Account', 'amount': 'avg_amt_orig', 'oldbalanceOrg': 'avg_oldbalance_orig', 'newbalanceOrig': 'avg_newbalance_orig'})

    # Example: Average balances for each account (destination)
    temp_dest = df.groupby('nameDest').agg({
        'oldbalanceDest': 'mean',
        'newbalanceDest': 'mean'
    }).reset_index()
    temp_dest = temp_dest.rename(columns={'nameDest': 'Account', 'oldbalanceDest': 'avg_oldbalance_dest', 'newbalanceDest': 'avg_newbalance_dest'})

    # Merge both origin and destination aggregations on accounts
    accounts = pd.merge(accounts, temp_orig, on='Account', how='left')
    accounts = pd.merge(accounts, temp_dest, on='Account', how='left')

    # Fill missing values with 0 (if an account only appears in one of the origin/destination)
    accounts = accounts.fillna(0)

    return accounts

In [6]:
def get_node_attr(df):
    # Aggregate transactional data for node features
    node_df = transaction_aggregate(df)

    # Extract node labels (isFraud)
    node_label = df['isFraud'].values
    node_label = tf.convert_to_tensor(node_label, dtype=tf.float32)
    node_label = tf.reshape(node_label, (-1, 1))

    return node_df, node_label

In [7]:
# Apply preprocessing
df = preprocess(df)

In [None]:
# Get node embeddings and labels
node_df, node_label = get_node_attr(df)

In [None]:
# Display the node embeddings and labels
print(node_df)
print(node_label)