In [2]:
import boto3
import json
import mlflow.sagemaker as mfs

In [10]:
app_name = 'Sagemaker-Creditcard'
execution_role_arn = 'arn:aws:iam::354520713950:role/SageMaker-ExecutionRole'
image_ecr_url = '354520713950.dkr.ecr.ap-northeast-2.amazonaws.com/mlflow-pyfunc:1.26.1'
region = 'ap-northeast-2'

s3_bucket_name = 'mlops-creditcard-runs'
experiment_id = '3'
run_id ='200271414f29476984998383ad8a700e'
model_name ='anom_weight_5_fold_4'

model_uri = "s3://{}/{}/{}/artifacts/{}/".format(s3_bucket_name, experiment_id, run_id, model_name)

In [1]:
def query(input_json):
    client = boto3.session.Session().client('sagemaker-runtime', region)
    response = client.invoke_endpoint(
        EndpointName=app_name,
        Body=input_json,
        ContentType='application/json; format=pandas-split',
    )
    preds = response['Body'].read().decode('ascii')
    preds = json.loads(preds)
    return preds

In [43]:
import pandas as pd
import mlflow
import mlflow.sklearn
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, recall_score, f1_score, classification_report
import numpy as np

In [35]:
df = pd.read_csv('creditcard.csv')

In [36]:
normal = df[df.Class == 0].sample(frac=0.5, random_state=42).reset_index(drop=True)
anomaly = df[df.Class == 1]
normal_train, normal_test = train_test_split(normal,
test_size = 0.2, random_state = 42)
anomaly_train, anomaly_test = train_test_split(anomaly,
test_size = 0.2, random_state = 42)

In [37]:
scaler = StandardScaler()
scaler.fit(pd.concat((normal, anomaly)).drop(["Time", "Class"], axis=1))

StandardScaler()

In [38]:
scaled_selection = scaler.transform(df.iloc[:80].drop(["Time", "Class"], axis=1))
input_json = pd.DataFrame(scaled_selection).to_json(orient="split")

In [39]:
pd.DataFrame(query(input_json)).T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,70,71,72,73,74,75,76,77,78,79
0,1,1,0,0,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [56]:
test = pd.concat((normal.iloc[:2500], anomaly.iloc[:300]))
true = test.Class
test = scaler.transform(test.drop(["Time", "Class"], axis=1))
preds = []

## Undersampling Method => Only high recall for out of sample test!

In [57]:
batch_size = 80
for f in range(35):
    print(f"Batch {f}", end=" - ")
    sample = pd.DataFrame(test[f*batch_size:(f+1)*batch_size]).to_json(orient="split")
    
    output = query(sample)
    resp = pd.DataFrame([output])
    
    preds = np.concatenate((preds, resp.values[0]))
    print("Completed")
eval_acc = accuracy_score(true, preds)
eval_auc = roc_auc_score(true, preds)
eval_f1 = f1_score(true, preds)
eval_recall = recall_score(true, preds)
print("Eval Acc", eval_acc)
print("Eval AUC", eval_auc)
print("Eval f1", eval_f1)
print("Eval Recall", eval_recall)

Batch 0 - Completed
Batch 1 - Completed
Batch 2 - Completed
Batch 3 - Completed
Batch 4 - Completed
Batch 5 - Completed
Batch 6 - Completed
Batch 7 - Completed
Batch 8 - Completed
Batch 9 - Completed
Batch 10 - Completed
Batch 11 - Completed
Batch 12 - Completed
Batch 13 - Completed
Batch 14 - Completed
Batch 15 - Completed
Batch 16 - Completed
Batch 17 - Completed
Batch 18 - Completed
Batch 19 - Completed
Batch 20 - Completed
Batch 21 - Completed
Batch 22 - Completed
Batch 23 - Completed
Batch 24 - Completed
Batch 25 - Completed
Batch 26 - Completed
Batch 27 - Completed
Batch 28 - Completed
Batch 29 - Completed
Batch 30 - Completed
Batch 31 - Completed
Batch 32 - Completed
Batch 33 - Completed
Batch 34 - Completed
Eval Acc 0.4014285714285714
Eval AUC 0.6604
Eval f1 0.2616740088105727
Eval Recall 0.99


In [58]:
print(classification_report(true, preds))

              precision    recall  f1-score   support

           0       1.00      0.33      0.50      2500
           1       0.15      0.99      0.26       300

    accuracy                           0.40      2800
   macro avg       0.57      0.66      0.38      2800
weighted avg       0.91      0.40      0.47      2800



In [59]:
print(confusion_matrix(true, preds))

[[ 827 1673]
 [   3  297]]
