In [None]:
import pickle
import warnings

import boto3
import numpy as np
import pandas as pd
import seaborn as sns
import snowflake.connector as sf
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix


import os
from dotenv import load_dotenv

load_dotenv("../../.env")
from common import configuration

In [None]:
#set preferances

pd.options.display.max_columns = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
warnings.filterwarnings(action='once')
sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':'white'})
sns.set_style("darkgrid")

In [None]:
feats = pd.read_csv("../../../feats.csv", header=None)
data = pd.read_csv("../../../output.csv", header=None)
feats = feats.drop([0], axis=1)
feats

In [None]:
BUCKET = 'lili-ml-tests'
DAG = 'test-dag'
EXECUTION_DATE = '2020-07-11T07:19:58.478946+00:00'
config = configuration.Config(dag=DAG,
                execution_date=EXECUTION_DATE,
                snowflake_connection=None,
                s3_connection=None,
                stages_bucket=BUCKET,
                models_bucket=None
                )


loaded_model = pickle.load(open('../../model_dumps/ODmodel_2021-08-02T08:11:52.106906.pkl', 'rb'))
type(loaded_model)

In [None]:
features = np.array(feats)
predictions = loaded_model.predict(config, features)
preds = []
#need to change results a bit
predictions_to_overdraft = {
    0: 0,
    20: 1,
    40: 2,
    60: 3,
    100: 4
}


for elt in predictions:
    preds.append(predictions_to_overdraft[elt[0]])


In [None]:
count = [0,0,0,0,0]
for elt in preds:
    count[elt]=count[elt]+1
print(count)      

In [None]:
data[data[1]==100].shape

In [None]:
#counting indices of 100 in order to be more careful there
hun_ind = []
for i in range(0,len(preds)):
    if preds[i]==4:
        hun_ind.append(i)
        
print(hun_ind)

In [None]:
loaded_model.predict(config, [features[50]])

In [None]:
(loaded_model.predict_proba(config, [features[50]]))[0][4]

In [None]:
hun_probs = []
for i in range(0,len(hun_ind)):
    a = (loaded_model.predict_proba(config, [features[hun_ind[i]]]))[0][4]
    hun_probs.append(a)
        
        
hun_probs = np.array(hun_probs)
#print(np.sort(hun_probs))
print(np.median(hun_probs))

In [None]:
#decided to use this threshold
from overdraft_prediction.fine_tuner import FineTuner, ThresholdType, ActionType
finetune = FineTuner(4, 0.513, ThresholdType.LESS_THAN, ActionType.NEXT_BEST)

In [None]:
from overdraft_prediction.model import ODModel

model_tune = ODModel(loaded_model.model, finetune)

features = np.array(feats)
predictions = model_tune.predict(config, features)
preds = []
#need to change results a bit

for elt in predictions:
    preds.append(predictions_to_overdraft[elt[0]])

count = [0,0,0,0,0]
for elt in preds:
    count[elt]=count[elt]+1
print(count)  

In [None]:
[280250, 1344, 228, 651, 428]