In [1]:
cd d:\\DataScience\\AnomalyDetection_JPMC

d:\DataScience\AnomalyDetection_JPMC


In [29]:
import pandas as pd
import pickle
import numpy as np
from pathlib import Path

from sklearn import set_config
set_config(display='diagram')

from sklearn.base import BaseEstimator, TransformerMixin

In [4]:
filepath = Path("data/original/inference_data.csv")

df = pd.read_csv(filepath)
df.shape

(149818, 13)

In [17]:
df = df[df.Transaction_Type.isin(['PAY-CHECK', 'MOVE-FUNDS', 'QUICK-PAYMENT', 'MAKE-PAYMENT'])]

In [9]:
class TimestampTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, columns=None):
        self.columns = columns
    
    def fit(self, X, y=None):
        if not isinstance(X, pd.DataFrame):
            raise ValueError(f"{X} must be a pandas DataFrame")
        return self

    def transform(self, X):
        if not isinstance(X, pd.DataFrame):
            raise ValueError(f"{X} must be a pandas DataFrame")
        
        if self.columns is None:
            return X
        else:
            X_ = X.copy()
            for each in self.columns:
                X_[each] = pd.to_datetime(X[each])
                day = X_[each].dt.day
                hour = X_[each].dt.hour
                day_of_week = X_[each].dt.dayofweek
                X_['DaySin'] = np.sin(2 * np.pi * day / 31)
                X_['DayCos'] = np.cos(2 * np.pi * day / 31)
                X_['HourSin'] = np.sin(2 * np.pi * hour / 24)
                X_['HourCos'] = np.cos(2 * np.pi * hour / 24)
                X_['DoWSin'] = np.sin(2 * np.pi * day_of_week / 7)
                X_['DoWCos'] = np.cos(2 * np.pi * day_of_week / 7)
            return X_

In [10]:
class InteractionTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.grouping_columns = ['Sender_Id', 'Bene_Id']

    def fit(self, X, y=None):
        if not isinstance(X, pd.DataFrame):
            raise ValueError(f"{X} must be a pandas DataFrame")

        self.interaction_frequency = X.groupby(self.grouping_columns).size().reset_index().rename(columns={0: 'Interaction_Frequency'})
        self.interaction_amount = X.groupby(self.grouping_columns)['USD_amount'].mean().reset_index().rename(columns = {'USD_amount':'Amount_Mean'})
        return self

    def transform(self, X):
        if not isinstance(X, pd.DataFrame):
            raise ValueError(f"{X} must be a pandas DataFrame")
            
        X_ = X.copy()
        X_ = pd.merge(X_, self.interaction_frequency, on=self.grouping_columns, how='left')
        X_['Interaction_Frequency'] = X_['Interaction_Frequency'].fillna(0)
        X_ = pd.merge(X_, self.interaction_amount, on=self.grouping_columns, how='left')
        X_['Amount_Mean'] = X_['Amount_Mean'].fillna(X_['Amount_Mean'].mean())
        return X_

In [11]:
class CubeRootTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, columns=None):
        self.columns = columns
    
    def fit(self, X, y=None):
        if not isinstance(X, pd.DataFrame):
            raise ValueError(f"{X} must be a pandas DataFrame")
        return self
    
    def transform(self, X):
        if not isinstance(X, pd.DataFrame):
            raise ValueError(f"{X} must be a pandas DataFrame")
            
        if self.columns is None:
            return X
        else:
            X_ = X.copy()
            X_[self.columns] = X_[self.columns].apply(lambda x: np.cbrt(x))
            return X_

In [12]:
class TargetEncoder(BaseEstimator, TransformerMixin):
    def __init__(self, columns):
        self.columns = columns
        self.mapping = {}
        self.target = 'Label'
    
    def fit(self, X, y):
        if not isinstance(X, pd.DataFrame):
            raise ValueError(f"{X} must be a pandas DataFrame")
        
        y.name = self.target
        X_ = pd.concat([X,y], axis=1)
        for col in self.columns:
            encoding_map = X_.groupby(col)[self.target].mean().to_dict()
            self.mapping[col] = encoding_map
        return self
        
    def transform(self, X):
        if not isinstance(X, pd.DataFrame):
            raise ValueError(f"{X} must be a pandas DataFrame")

        X_ = X.copy()
        for col in self.columns:
            X_[col] = X_[col].map(self.mapping[col])
            X_[col] = X_[col].fillna(np.array(list(self.mapping[col].values())).mean())
        return X_

In [13]:
class ColumnDropper(BaseEstimator, TransformerMixin):
    def __init__(self, dropping_columns):
        self.dropping_columns = dropping_columns
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X_ = X.copy()
        return X_.drop(self.dropping_columns, axis=1)

In [14]:
model_pipeline = pickle.load(open('artifacts/model_pipeline.pkl','rb'))

In [30]:
model_pipeline

In [18]:
model_pipeline.predict(df.drop('Label', axis=1))

array([ 1,  1,  1, ...,  1, -1,  1])

In [24]:
data = {
    "Time_step": "07-11-2026  01:14:29",
    "Transaction_Id": "MAKE-PAYMENT-318236",
    "Sender_Id": "JPMC-CLIENT-318205",
    "Sender_Account": "ACCOUNT-318216",
    "Sender_Country": "USA",
    "Sender_Sector": 36226,
    "Sender_lob": "CCB",
    "Bene_Id": "JPMC-CLIENT-318207",
    "Bene_Account": "ACCOUNT-318219",
    "Bene_Country": "USA",
    "USD_amount": 574.44,
    "Transaction_Type": "MAKE-PAYMENT"
}

In [25]:
pd.DataFrame(data, index=[0])

Unnamed: 0,Time_step,Transaction_Id,Sender_Id,Sender_Account,Sender_Country,Sender_Sector,Sender_lob,Bene_Id,Bene_Account,Bene_Country,USD_amount,Transaction_Type
0,07-11-2026 01:14:29,MAKE-PAYMENT-318236,JPMC-CLIENT-318205,ACCOUNT-318216,USA,36226,CCB,JPMC-CLIENT-318207,ACCOUNT-318219,USA,574.44,MAKE-PAYMENT


In [27]:
if model_pipeline.predict(pd.DataFrame(data, index=[0])) == 1:
    print('Not Anomaly')
else:
    print('Anomaly')

Not Anomaly
