In [1]:
! pip install --quiet alibi-detect[torch] albumentations torchdrift

In [117]:
from typing import Optional, Any

import torch
import torch.nn as nn

import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as T
from PIL import Image
import copy
import boto3
from utils import extract_archive, download_file_from_s3
from pathlib import Path
import os

In [4]:
# download file
# Set the bucket name and file name
bucket_name = 'sagemaker-us-east-1-input-data'
file_name = 'mini_dataset.zip'

download_file_from_s3(bucket_name, file_name)

Sucessfully file downloaded


In [11]:
# root_dataset = Path("")
# dataset_extracted.mkdir(parents=True, exist_ok=True)
extract_archive(
    './'+'mini_dataset.zip',
    ""
)

''

In [118]:
os.listdir()

['05-test-staging.ipynb',
 '__pycache__',
 'sagemaker-capstone-project-v1-p-sqr54jwsvwmr-modelbuild',
 'model.scripted.pt',
 'mini_dataset.zip',
 'newdriftdetectiononline.ipynb',
 'sagemaker-intel-pipeline',
 '06-test-production.ipynb',
 'sagemaker-capstone-project-v1-p-sqr54jwsvwmr-modeldeploy',
 'infer_drift.py',
 'final_codecommit_pipeline.zip',
 '04-pipeline_with_drift.ipynb',
 'TestImages',
 'requirements.txt',
 'inferWithDrift.py',
 '04-pipeline.ipynb',
 '.sparkmagic',
 'mini_dataset',
 '.ipynb_checkpoints',
 'utils.py',
 'lost+found']

In [33]:
transform = T.Compose([
                       T.Resize((224, 224)),
                       T.ToTensor(),
                       T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

idx_to_class = {
    0: 'buildings', 1: 'forest', 2: 'glacier', 3: 'mountain', 4: 'sea', 5: 'street'
}

classnames = ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

In [21]:
# load model
def model_fn():
    model = torch.jit.load("model.scripted.pt")

    model.to(device).eval()

    return model

In [22]:
device = "cuda" if torch.cuda.is_available() else "cpu"


In [23]:
dataset_dir = "./mini_dataset"
model_dir = "."

model = model_fn()

#### Reference: https://docs.seldon.io/projects/alibi-detect/en/stable/cd/methods/onlinemmddrift.html

In [43]:
transform = T.Compose([T.ToPILImage(),
                       T.Resize((224, 224)),
                       T.ToTensor(),
                       T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

In [25]:
train_dataset = torchvision.datasets.ImageFolder(root=dataset_dir , transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1)

In [26]:
from typing import Tuple, Generator, Callable, Optional


In [27]:
def stream_intel(
    dataset_dir: str=None,
) -> Generator:
    transform = T.Compose([
                       T.Resize((224, 224)),
                       T.ToTensor(),
                       T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
    
    train_dataset = torchvision.datasets.ImageFolder(root=dataset_dir , transform=transform)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1)
    ds_iter = iter(train_loader)

    while True:
        try:
            img = next(ds_iter)[0][0]
        except Exception:
            ds_iter = iter(train_loader)
            img = next(ds_iter)[0][0]
        yield img.numpy()

In [39]:
N = 50  # size of reference set
stream_i = stream_intel(dataset_dir=dataset_dir)
x_ref = np.stack([next(stream_i) for _ in range(N)], axis=0)

In [29]:
# inference
def predict_fn(input_object, model, out_prob_only=False):
    with torch.no_grad():
        prediction = model(input_object)
        prediction = F.softmax(prediction, dim=1)

    if out_prob_only:
        return prediction
    else:
        confidences, cat_ids = torch.topk(prediction, 5)
        outputs = {
            idx_to_class[idx.item()]: c.item() for c, idx in zip(confidences[0], cat_ids[0])
        }
        return outputs

In [31]:

image = Image.open(dataset_dir+"/buildings"+"/"+"0.jpg")
preprocess_image = transform(np.array(image).astype(np.uint8)).unsqueeze(0).to(device)

In [34]:
out = predict_fn(preprocess_image, model, out_prob_only=False )
out

{'buildings': 0.36699575185775757,
 'sea': 0.21391302347183228,
 'glacier': 0.14441034197807312,
 'mountain': 0.13991564512252808,
 'street': 0.1005551666021347}

In [35]:
def model_fn(x: np.ndarray) -> np.ndarray:
    x = torch.as_tensor(x).to(device)
    with torch.no_grad():
        x_proj = model(x)
    return x_proj.cpu().numpy()

In [84]:
ERT = 150  # expected run-time in absence of change
W = 2  # size of test window
B = 50_000  # number of simulations to configure threshold

In [85]:
from alibi_detect.cd import MMDDriftOnline

dd = MMDDriftOnline(x_ref, ERT, W, backend='pytorch')

No GPU detected, fall back on CPU.


Generating permutations of kernel matrix..


100%|██████████| 1000/1000 [00:00<00:00, 21851.14it/s]
Computing thresholds: 100%|██████████| 2/2 [00:00<00:00,  5.27it/s]


### Detect Drift


In [113]:
transform = T.Compose([T.ToPILImage(),
                       T.Resize((224, 224)),
                       T.ToTensor(),
                       T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
inp_img = transform(np.array(image).astype(np.uint8)).to(device)


In [114]:
preds = dd.predict(np.array(inp_img), return_test_stat=True)

In [115]:
preds

{'data': {'is_drift': 0,
  'distance': None,
  'p_val': None,
  'threshold': 0.40739706158638,
  'time': 9,
  'ert': 150,
  'test_stat': 0.1339147984981537},
 'meta': {'name': 'MMDDriftOnlineTorch',
  'online': True,
  'data_type': None,
  'version': '0.10.4',
  'detector_type': 'drift',
  'backend': 'pytorch'}}

In [98]:
import albumentations as A


In [108]:
perturb = A.RandomBrightnessContrast(
    brightness_limit=5,
    contrast_limit=3,
    p=1.0
)


In [109]:
perturbed_image = torch.tensor(
    perturb(
        image = np.array(image)
)['image']
)

transform2 = T.Compose([T.ToPILImage(),
                       T.Resize((224, 224)),
                       T.ToTensor(),
                       T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                       ])

inp_img2 = transform2(np.array(perturbed_image).astype(np.uint8)).to(device)



In [110]:
preds = dd.predict(np.array(inp_img2), return_test_stat=True)
preds

{'data': {'is_drift': 1,
  'distance': None,
  'p_val': None,
  'threshold': 0.40739706158638,
  'time': 7,
  'ert': 150,
  'test_stat': 1.2232393026351929},
 'meta': {'name': 'MMDDriftOnlineTorch',
  'online': True,
  'data_type': None,
  'version': '0.10.4',
  'detector_type': 'drift',
  'backend': 'pytorch'}}

In [112]:
dd.score(np.array(inp_img2))

1.2260148525238037