In [1]:
import pandas as pd
import numpy as np
import pickle
import random
from matplotlib import pyplot as plt

# !pip uninstall mlbugdetection
# import mlbugdetection
# from mlbugdetection.monotonic import check_monotonicity_single_sample, check_monotonicity_multiple_samples
# from mlbugdetection.calibration import calibration_check
from mlbugdetection.critical_values import find_critical_values, find_several_critical_values
from mlbugdetection.sanity import sanity_check, sanity_check_with_indexes

In [2]:
with open('models/XGBoost/XGBoost.pkl', 'rb') as f:
    XGBoost = pickle.load(f)
with open('models/LogisticRegression/LogisticRegression.pkl', 'rb') as f:
    LR = pickle.load(f)
with open('models/RandomForest/RandomForest.pkl', 'rb') as f:
    RF = pickle.load(f)
with open('models/SVM/SVMTitanic.pkl', 'rb') as f:
    SVM = pickle.load(f)
models = [XGBoost, LR, RF]

  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.



In [3]:
df = pd.read_csv('../datasets/fraud_new.csv')
df_titanic = pd.read_csv('../datasets/TitanicClean.csv')
example = df.sample(1)
example_titanic = df_titanic.sample(1)
examples = df.sample(100)
examples = examples.drop('isFraud', axis = 1)

Teste Critical Values

In [4]:
help(find_critical_values)

Help on function find_critical_values in module mlbugdetection.critical_values:

find_critical_values(model, sample, feature: str, start, stop, steps=100, keep_n=3)
    Critical Values Finder
        Finds highest changes (positive or negative) in predict_proba 
        over an specified inteval [`start`, `stop`].
    
    Parameters
    ----------
    model : sklearn model or str
        Model already trained and tested from scikit-learn. Could be a model object or a path to a model file.
    
    sample : pandas DataFrame
        A single row of the dataframe that will be used for the analysis.
    
    feature : str
        Feature of dataframe that will be analysed.
    
    start : int
        The starting value of the feature's interval.
    
    stop : int
        The end value of the feature's interval.
    
    steps : int, default=100
        Number of values that will be atributed to the analysed feature. Must be non-negative.
        Example: start = 1, stop = 100, steps = 

Teste Sanity Check

In [16]:
sanity_check(LR, df, 'isFraud')

False

In [17]:
sanity_analysis = sanity_check_with_indexes(LR, df, 'isFraud')
sanity_analysis.metrics

{'sanity': False,
 'sanity_indexes': [0,
  8,
  9,
  12,
  120,
  222,
  227,
  325,
  342,
  374,
  406,
  430,
  437,
  446,
  447,
  449,
  459,
  462,
  490,
  539,
  551,
  561,
  579,
  634,
  665,
  705,
  746,
  757,
  803,
  808,
  811,
  812,
  817,
  846,
  851,
  853,
  907,
  931,
  997,
  1045,
  1050,
  1052,
  1077,
  1088,
  1119,
  1176,
  1184,
  1202,
  1237,
  1238,
  1273,
  1343,
  1408,
  1409,
  1419,
  1438,
  1460,
  1478,
  1479,
  1484,
  1493,
  2085,
  2124,
  2189,
  2314,
  2340,
  2415,
  2478,
  2491,
  2534,
  2543,
  2672,
  2699,
  2713,
  2773,
  2776,
  2849,
  2850,
  2855,
  2860,
  3004,
  3048,
  3110,
  3119,
  3197,
  3199,
  3242,
  3295,
  3304,
  3377,
  3381,
  3469,
  3470,
  3471,
  3496,
  3522,
  3552,
  3575,
  3608,
  3627,
  3629,
  3659,
  3746,
  3751,
  3759,
  3809,
  3827,
  3839,
  3876,
  3955,
  3968,
  3978,
  3988,
  4045,
  4080,
  4081,
  4125,
  4226,
  4240,
  4362,
  4430,
  4451,
  4456,
  4460,
  4530,
  4617,
  

Others analysis

In [None]:
for c in df_titanic.columns:
    if c != 'isFraud':
        minV = df_titanic[c].min()
        maxV = df_titanic[c].max()
        teste = check_monotonicity(c,minV,maxV,example_titanic, "models/SVM/SVMTitanic.pkl", 150)
        # teste = check_monotonicity(c,minV,maxV,example_titanic, SVM, 150)
    break

In [None]:
print(teste)
teste.save_graphs()

In [None]:
print(teste.errors)
print(teste.warnings)
print(teste.metrics)
print(teste.model_info)

In [None]:
for c in df_titanic.columns:
    if c != 'isFraud':
        minV = 100
        maxV = 100
        find_critical_values(SVM, example_titanic, c, minV, maxV, step = 1000 )

In [None]:
for c in df.columns:
    if c != 'isFraud':
        minV = df[c].min()
        maxV = df[c].max()
        for model in models:
            teste3 = find_critical_values(model, example, c, minV, maxV )
    break

In [None]:
for c in df.columns:
    if c != 'isFraud':
        minV = df[c].min()
        maxV = df[c].max()
        for model in models:
            # check_monotonicity(c,minV,maxV,example, model, 150, plot_graph=True)
            check_monotonicity(c,minV,maxV,example, model, 150)

In [None]:
for model in models:
    teste2 = calibration_check("isFraud", model, df)
    break

In [None]:
teste2.save_graphs()

In [None]:
teste2.graphs