In [11]:
import numpy as np
import pandas as pd
import os
import random
import time

In [12]:
def is_weekend(datetime: pd.Timestamp) -> int:
    return int(datetime.weekday() >= 5)

In [13]:
def is_night(datetime: pd.Timestamp) -> int:
    return int(datetime.hour < 6 or datetime.hour > 22)

In [14]:
def get_customer_spending_behaviors(df: pd.DataFrame, day_windows: list[int]) -> pd.DataFrame:
    df.sort_values("transaction_datetime", inplace=True)
    df.index = df["transaction_datetime"]
    
    for window in day_windows:
        df[f"customer_num_transactions_{window}_days"] = df["amount"].rolling(str(window) + "d").count()
        df[f"customer_avg_spent_{window}_days"] = df["amount"].rolling(str(window) + "d").mean()
    
    df.index = df["transaction_id"]
    
    return df

In [15]:
def get_terminal_risks(df: pd.DataFrame, delay: int = 7, day_windows: list[int] = [1, 7, 30]) -> pd.DataFrame:
    df.sort_values("transaction_datetime", inplace=True)
    df.index = df["transaction_datetime"]
    
    transactions_delay = df["fraud"].rolling(str(delay) + "d").count()
    fraudulent_delay = df["fraud"].rolling(str(delay) + "d").sum()
    
    for window in day_windows:
        transactions_delay_window = df["fraud"].rolling(str(window + delay) + "d").count()
        fraudulent_delay_window = df["fraud"].rolling(str(window + delay) + "d").sum()
        
        transactions_window = transactions_delay_window - transactions_delay
        fraudulent_window = fraudulent_delay_window - fraudulent_delay
        
        risk_window = fraudulent_window / transactions_window
        
        df[f"terminal_num_transactions_{window}_days"] = list(transactions_window)
        df[f"terminal_risk_{window}_days"] = list(risk_window)
    
    df.index = df["transaction_id"]
    
    df.fillna(0, inplace=True)
    
    return df

In [16]:
df = pd.read_csv("datasets/transactions.csv")

df["transaction_datetime"] = pd.to_datetime(df["transaction_datetime"])

In [17]:
df["is_weekend"] = df["transaction_datetime"].apply(is_weekend)
df["is_night"] = df["transaction_datetime"].apply(is_night)

In [18]:
df = df.groupby("customer_id").apply(get_customer_spending_behaviors, day_windows=[1, 7, 30]).sort_values("transaction_datetime").reset_index(drop=True)

  df = df.groupby("customer_id").apply(get_customer_spending_behaviors, day_windows=[1, 7, 30]).sort_values("transaction_datetime").reset_index(drop=True)


In [19]:
df = df.groupby("terminal_id").apply(get_terminal_risks, delay=7, day_windows=[1, 7, 30]).sort_values("transaction_datetime").reset_index(drop=True)

  df = df.groupby("terminal_id").apply(get_terminal_risks, delay=7, day_windows=[1, 7, 30]).sort_values("transaction_datetime").reset_index(drop=True)


In [20]:
df.to_csv("datasets/transactions_transformed.csv", index=False)