In [1]:
import pandas as pd
import numpy as np
import pickle
import random


In [2]:
with open('Modelos/XGBoost/XGBoost.pkl', 'rb') as f:
    XGBoost = pickle.load(f)
with open('Modelos/LogisticRegression/LogisticRegression.pkl', 'rb') as f:
    LR = pickle.load(f)
with open('Modelos/RandomForest/RandomForest.pkl', 'rb') as f:
    RF = pickle.load(f)

In [3]:
full_df = pd.read_csv('Datasets/fraud_new.csv')


In [4]:
full_df.head()

Unnamed: 0,step,type,amount,oldBalanceOrig,newBalanceOrig,oldBalanceDest,newBalanceDest,isFraud,errorBalanceOrig,errorBalanceDest
0,1,0,181.0,181.0,0.0,0.0,0.0,1,0.0,181.0
1,1,1,181.0,181.0,0.0,21182.0,0.0,1,0.0,21363.0
2,1,1,229133.94,15325.0,0.0,5083.0,51513.44,0,213808.94,182703.5
3,1,0,215310.3,705.0,0.0,22425.0,0.0,0,214605.3,237735.3
4,1,0,311685.89,10835.0,0.0,6267.0,2719172.89,0,300850.89,-2401220.0


In [5]:
X = full_df.drop(['isFraud'], axis=1)

In [6]:
subset = full_df.loc[(X["oldBalanceOrig"] == full_df["amount"]) & (full_df['isFraud'] == 1)]
subset = subset.drop(['isFraud'], axis=1)
example = subset.loc[[0]]
example

Unnamed: 0,step,type,amount,oldBalanceOrig,newBalanceOrig,oldBalanceDest,newBalanceDest,errorBalanceOrig,errorBalanceDest
0,1,0,181.0,181.0,0.0,0.0,0.0,0.0,181.0


In [7]:
def check_two_columns_equality_df(df, col1, col2, fraud):
    subset = df.loc[(df[col1] == df[col2]) & (df['isFraud'] == fraud)]
    if subset.empty:
        print("No cases found that violate conditions")
    else:
        print("Found a subset that violates condition")
    return subset

In [8]:
def check_two_columns_equality_model(model,example, col1, col2, fraud):
    tests = []
    for i in range(40):
        x = example.copy()
        x[col1] = example[col1] + i ** 3
        x[col2] = x[col1]
        tests.append(x.copy())
    results = []
    for t in tests:
        r = model.predict_proba(t)
        if (r[0][0]) < (r[0][1]):
            results.append(fraud)
        else:
            results.append(not fraud)
    if (not fraud) in results:
        print("Found cases that violate conditions")
    else:
        print("No cases found that violate conditions")
    return results

    

In [9]:
def check_monotonic(model, col, example, positive = True):
    y = example
    testes = []
    results = []
    for c in example.columns:
        y[c] = X[c].mean()
    y[col] = 0
    for i in range(1,100000, 5000):
        y[col] = i
        testes.append(y.copy())
    for t in testes:
        r = model.predict_proba(t)
        results.append(r[0][1])
    if positive:
        for i in range(1,len(testes)-1):
            if results[i-1] <= results[i]:
                return False
    else:
        for i in range(1,len(testes)-1):
            if results[i-1] > results[i]:
                return False
    return True

In [10]:
print(check_two_columns_equality_model(LR, example, 'amount', 'oldBalanceOrig', 1))


Found cases that violate conditions
[False, False, False, False, False, False, False, False, False, False, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [11]:
print(check_monotonic(LR, 'amount', example))

[6.528896493355201e-13, 6.106154141689068e-13, 5.710784117961354e-13, 5.341014079434655e-13, 4.995186441560423e-13, 4.671750947451212e-13, 4.369257718475326e-13, 4.0863507548216834e-13, 3.821761856899265e-13, 3.574304940322592e-13, 3.3428707189986506e-13, 3.12642173248094e-13, 2.9239876952995184e-13, 2.7346611474193045e-13, 2.5575933863287387e-13, 2.3919906625233975e-13, 2.2371106213299035e-13, 2.092258975119678e-13, 1.9567863909950525e-13, 1.83008557999586e-13]
True
