In [1]:
import pandas as pd
import numpy as np
import pickle

from generate_features import generate_features
from transformations import random_undersample
from sklearn.metrics import f1_score

Since I don't have the test files I'll just use the train files here for demonstration

In [2]:
# Load in the data.
df_transactions = pd.read_pickle('dataframes/df_transactions.pkl')
df_users = pd.read_pickle('dataframes/df_users.pkl')
df_fx = pd.read_pickle('dataframes/df_fx.pkl')
df_currency = pd.read_pickle('dataframes/df_currency.pkl')

In [3]:
# Load the model

with open('models/rf_clf.pkl', 'rb') as f:
    clf = pickle.load(f)

In [4]:
clf

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=3, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

# Part C) How to utilize the model

The general idea will be this: When the model predicts a fraudster with a confidence of 60-80%, we will simply alert the agent only. This is because it's possible the model may have made a mistake with such a low confidence and we don't want to negatively impact the user by locking their account (the smooth operation of the app and bank for the user is of paramount importance). 

However, if the model outputs a probability greater than 80%, we will both lock and alert an agent. Now the model is fairly cofident this is a fraudster and we want to take action immediately in this case. 

We will not be utilizing the 'just LOCK' option because it doesn't make sense: We don't want to lock an account without even assigning an agent to investigate. This is poor business practice and will result in loss of customers if they have to do all the work to get their account unlocked. If we make the decision to lock an account, an agent should be immediately assigned. And we don't want to do this unless we're quite certain (or else both the user and Revolut will suffer) therefore we do this action only if the model is confident.

In summary: Confidence level between 60-80%: Alert Agent. Confidence level between 80-100%: Alert Agent AND Lock account. If the confidence is less than 60%, we don't do anything. This may seem like a high threshold but our model was trained on undersampled data so it may predict many non-fraudsters as fraudsters (again this can be remedied by more advanced techniques but would be time consuming to implement and so I didn't do this to keep it simple). 

In [5]:
ids = np.array(df_users['ID'])
ids[:5]

array(['1872820f-e3ac-4c02-bdc7-727897b60043',
       '545ff94d-66f8-4bea-b398-84425fb2301e',
       '10376f1a-a28a-4885-8daa-c8ca496026bb',
       'fd308db7-0753-4377-879f-6ecf2af14e4f',
       '755fe256-a34d-4853-b7ca-d9bb991a86d3'], dtype=object)

In [6]:
X, y = generate_features(df_transactions=df_transactions,
                         df_users=df_users,
                         df_fx=df_fx,
                         df_currency=df_currency,
                         df_countries=None,
                         test_time = False)

In [7]:
confidence = clf.predict_proba(X)[:, 1]

decision = ['NOTHING: NON-FRAUDSTER' if c < 0.6
            else 'ALERT AGENT: POSSIBLE FRAUDSTER' if (c >= 0.6 and c <= 0.8)
            else 'LOCK AND ALERT AGENT: LIKELY FRAUDSTER'
            for c in confidence]

In [8]:
decision_dict = dict(zip(ids, decision))

In [10]:
# classifier score on the ENTIRE unbalanced training set.

clf.score(X, y)

0.8400970873786408

# Part D) Simple algorithm to output decision a given ID.

In [11]:
def patrol(ID):
    return decision_dict[ID]

In [12]:
# Let's look at some examples:

for i in range(10):
    ID = ids[i]
    print('Probability: ', clf.predict_proba(X[i].reshape(1, -1))[:, 1], '%', '\tAction: ', patrol(ID), '\n')

Probability:  [0.74035222] % 	Action:  ALERT AGENT: POSSIBLE FRAUDSTER 

Probability:  [0.24991477] % 	Action:  NOTHING: NON-FRAUDSTER 

Probability:  [0.0198064] % 	Action:  NOTHING: NON-FRAUDSTER 

Probability:  [0.21486943] % 	Action:  NOTHING: NON-FRAUDSTER 

Probability:  [0.12856556] % 	Action:  NOTHING: NON-FRAUDSTER 

Probability:  [0.69868555] % 	Action:  ALERT AGENT: POSSIBLE FRAUDSTER 

Probability:  [0.1283685] % 	Action:  NOTHING: NON-FRAUDSTER 

Probability:  [0.18567942] % 	Action:  NOTHING: NON-FRAUDSTER 

Probability:  [0.01827188] % 	Action:  NOTHING: NON-FRAUDSTER 

Probability:  [0.65344745] % 	Action:  ALERT AGENT: POSSIBLE FRAUDSTER 



The system is working as intended. That concludes this project. Please see the full_scripts folder to run all of this via py scripts