# Load and test all previously trained models!

In [61]:
import os
import pickle
from os.path import isfile, join
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_curve
from sklearn.metrics import confusion_matrix, f1_score


### Load all models at once

In [62]:
model_dir = 'models/'
model_names = [f for f in listdir(model_dir) if isfile(join(model_dir, f))]
print(model_names)

['.DS_Store', 'svc_balanced.sav', 'random_forest_model.sav', 'tree_clf_balanced.sav', 'knears_neighbors_balanced.sav', 'catboost_model.sav', 'log_reg_balanced.sav']


In [63]:
def print_file_size(filename):
    statinfo = os.stat(filename)
    print(filename, ':', statinfo.st_size/1000, 'Kb')
    
for name in model_names:
    print(name)
    print_file_size(model_dir + '/' + name)
    print('\n')

.DS_Store
models//.DS_Store : 6.148 Kb


svc_balanced.sav
models//svc_balanced.sav : 43.475 Kb


random_forest_model.sav
models//random_forest_model.sav : 168.089 Kb


tree_clf_balanced.sav
models//tree_clf_balanced.sav : 2.181 Kb


knears_neighbors_balanced.sav
models//knears_neighbors_balanced.sav : 392.695 Kb


catboost_model.sav
models//catboost_model.sav : 26796.498 Kb


log_reg_balanced.sav
models//log_reg_balanced.sav : 1.051 Kb




In [64]:
model_names.remove('.DS_Store')
print(model_names)

['svc_balanced.sav', 'random_forest_model.sav', 'tree_clf_balanced.sav', 'knears_neighbors_balanced.sav', 'catboost_model.sav', 'log_reg_balanced.sav']


### Load the dataset

In [65]:
transactions = pd.read_csv('creditcard.csv')
print(transactions.shape)
X = transactions.drop('Class', 1)
y = transactions['Class']

(284807, 31)


### Test the model on the whole dataset

In [66]:
def load_model(model_name):
    loaded_model = pickle.load(open(model_dir + '/' + str(model_name), 'rb'))
    return loaded_model

def model_performance_report(model_name, X, y):
    model = load_model(model_name)
    predictions = model.predict(X)
    print(confusion_matrix(y, predictions))
    
# for model_name in model_names:
#     model_performance_report(model_name, X, y)

for model_name in model_names:
    print(model_name)
    model = load_model(model_name)
    print(type(model))
    predictions = model.predict(X)
    print(confusion_matrix(y, predictions))
#     model = load_model(model_name)
#     print(type(model))
#     model_performance_report(model, X, y)
    print('\n')


svc_balanced.sav
<class 'sklearn.svm.classes.SVC'>
[[284307      8]
 [   492      0]]


random_forest_model.sav
<class 'sklearn.ensemble.forest.RandomForestClassifier'>
[[284300     15]
 [    96    396]]


tree_clf_balanced.sav
<class 'sklearn.tree.tree.DecisionTreeClassifier'>
[[243884  40431]
 [   473     19]]


knears_neighbors_balanced.sav
<class 'sklearn.neighbors.classification.KNeighborsClassifier'>
[[284315      0]
 [   492      0]]


catboost_model.sav
<class 'catboost.core.CatBoostClassifier'>
[[284311      4]
 [    55    437]]


log_reg_balanced.sav
<class 'sklearn.linear_model.logistic.LogisticRegression'>
[[284311      4]
 [   492      0]]




Note: only two out of four models are suited to work with the raw dataset. We have to build pre-processing pipeline in order to make the rest of the models to classify correctly. 