## Image property drift without deepcheck

In [10]:
# !pip install gudhi
# !pip install POT
from time import time

In [6]:
import pandas as pd
import numpy as np
import os
import cv2
from PIL import Image
from PIL.ImageStat import Stat
from gudhi.wasserstein import wasserstein_distance

In [27]:
train_dir="dataset/virat_1"
test_dir="dataset/ADAS"

def area(image):
    image=np.array(image)
    height,width,_=image.shape
    return height*width
    
def aspect_ratio(image):
    image=np.array(image)
    height,width,_=image.shape
    return height/width

def brightness(image):
    image=np.uint8(image*255)
    image=Image.fromarray(image)
    image=image.convert('L')
    return Stat(image).mean[0]

def mbr_intesity(image,eps=0.0000001):
    # image=np.array(image)/255
    height,width,_=image.shape
    # imb=np.zeros(image.shape) #
    # for i in range(height):
    #     for j in range(width):
    #         imb[i,j]=image[i,j,2]/(sum(image[i,j,:])+eps) # red
    # return np.sum(imb)/(height*width) #
    return np.sum(image[:,:,2]/(np.sum(image,axis=2)+eps))/(height*width)
    
def mgr_intesity(image,eps=0.0000001):
    # image=np.array(image)/255
    height,width,_=image.shape
    # img=np.zeros(image.shape) #
    # for i in range(height):
    #     for j in range(width):
    #         img[i,j]=image[i,j,1]/(sum(image[i,j,:])+eps) # red
    # return np.sum(img)/(height*width)
    return np.sum(image[:,:,1]/(np.sum(image,axis=2)+eps))/(height*width)
    

def mrr_intesity(image,eps=0.0000001):
    # image=np.array(image)/255
    height,width,_=image.shape
    # imr=np.zeros(image.shape) #
    # for i in range(height):
    #     for j in range(width):
    #         imr[i,j]=image[i,j,0]/(sum(image[i,j,:])+eps) # red
    # return np.sum(imr)/(height*width)
    return np.sum(image[:,:,0]/(np.sum(image,axis=2)+eps))/(height*width)
    

def rms_contrast(image):
    image=np.uint8(image*255)
    img_grey=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    return img_grey.std()

In [28]:
properties=['Area', 'Aspect Ratio', 'Brightness', 'Mean Blue Relative Intensity', 'Mean Green Relative Intensity', 'Mean Red Relative Intensity', 'RMS Contrast']
image_properties=[]
time_taken={}
t1=time()
for img in os.listdir(train_dir)[:1]:
    image_property=[]
    img_path=os.path.join(train_dir,img)
    # image=Image.open(img_path)
    image=cv2.imread(img_path)/255
    image_property.append(area(image))
    t2=time()
    time_taken["area"]=t2-t1
    image_property.append(aspect_ratio(image))
    t3=time()
    time_taken["aspect_ratio"]=t3-t2
    image_property.append(brightness(image))
    t4=time()
    time_taken["brightness"]=t4-t3
    image_property.append(mbr_intesity(image))
    t5=time()
    time_taken["mbr_intesity"]=t5-t4
    image_property.append(mgr_intesity(image))
    t6=time()
    time_taken["mgr_intesity"]=t6-t5
    image_property.append(mrr_intesity(image))
    t7=time()
    time_taken["mrr_intesity"]=t7-t6
    image_property.append(rms_contrast(image))
    t8=time()
    time_taken["rms_contrast"]=t8-t7
    image_properties.append(image_property)
train_features=np.array(image_properties)
# train_features=pd.DataFrame(train_features,columns=properties)
time_taken

{'area': 0.03290915489196777,
 'aspect_ratio': 0.004182577133178711,
 'brightness': 0.010503292083740234,
 'mbr_intesity': 0.023222684860229492,
 'mgr_intesity': 0.02225041389465332,
 'mrr_intesity': 0.022437572479248047,
 'rms_contrast': 0.012737274169921875}

In [29]:
train_features=pd.DataFrame(train_features,columns=properties)
train_features

Unnamed: 0,Area,Aspect Ratio,Brightness,Mean Blue Relative Intensity,Mean Green Relative Intensity,Mean Red Relative Intensity,RMS Contrast
0,921600.0,0.5625,117.240958,0.31287,0.332825,0.354305,72.378711


{'area': 0.03642582893371582,
 'aspect_ratio': 0.003378629684448242,
 'brightness': 0.009185075759887695,
 'mbr_intesity': 3.8872950077056885,
 'mgr_intesity': 3.908198595046997,
 'mrr_intesity': 3.9440712928771973,
 'rms_contrast': 0.01011204719543457}

In [30]:
properties=['Area', 'Aspect Ratio', 'Brightness', 'Mean Blue Relative Intensity', 'Mean Green Relative Intensity', 'Mean Red Relative Intensity', 'RMS Contrast']
image_properties=[]
t1=time()
for img in os.listdir(train_dir)[:101]:
    image_property=[]
    img_path=os.path.join(train_dir,img)
    # image=Image.open(img_path)
    image=cv2.imread(img_path)/255
    image_property.append(area(image))
    image_property.append(aspect_ratio(image))
    image_property.append(brightness(image))
    image_property.append(mbr_intesity(image))
    image_property.append(mgr_intesity(image))
    image_property.append(mrr_intesity(image))
    image_property.append(rms_contrast(image))
    image_properties.append(image_property)
train_features=np.array(image_properties)
train_features=pd.DataFrame(train_features,columns=properties)

image_properties=[]
for img in os.listdir(test_dir):
    if img != '.ipynb_checkpoints':
        image_property=[]
        img_path=os.path.join(test_dir,img)
        # image=Image.open(img_path)
        image=cv2.imread(img_path)/255
        image_property.append(area(image))
        image_property.append(aspect_ratio(image))
        image_property.append(brightness(image))
        image_property.append(mbr_intesity(image))
        image_property.append(mgr_intesity(image))
        image_property.append(mrr_intesity(image))
        image_property.append(rms_contrast(image))
        image_properties.append(image_property)
test_features=np.array(image_properties)
test_features=pd.DataFrame(test_features,columns=properties)
time_taken=time()-t1
time_taken

16.191571950912476

In [9]:
# os.remove(os.path.join(test_dir,img))
drift_score={}
for prop in properties:
    drift_score[prop]=wasserstein_distance(np.array(train_features[[prop,"index"]]),np.array(test_features[[prop,"index"]]))
    
drift_score

{'Area': -58170950.0,
 'Aspect Ratio': -0.5625,
 'Brightness': -3936.938375108506,
 'Mean Blue Relative Intensity': 40.25033453310319,
 'Mean Green Relative Intensity': 29.615994457610054,
 'Mean Red Relative Intensity': 19.04523517348781,
 'RMS Contrast': -2008.6969795533714}

In [10]:
drifts=[]
for key in drift_score.keys():
    drifts.append(drift_score[key])
mx,mn=max(drifts),min(drifts)
new_drifts={}
for key in drift_score.keys():
    new_drifts[key]=(drift_score[key]-mn)/(mx-mn)
new_drifts

{'Area': 0.0,
 'Aspect Ratio': 0.999999298398835,
 'Brightness': 0.9999316293449274,
 'Mean Blue Relative Intensity': 1.0,
 'Mean Green Relative Intensity': 0.9999998171882578,
 'Mean Red Relative Intensity': 0.9999996354695138,
 'RMS Contrast': 0.9999647771628906}

## Image property drift with deepcheck

In [11]:
import os,argparse
from skimage import io, transform
from PIL import Image
import pandas as pd
import numpy as np

import torch
from torch.utils.data import Dataset,DataLoader
import torchvision.transforms as transforms

import deepchecks
from deepchecks.vision import VisionData
from deepchecks.vision.checks import ImagePropertyDrift

from typing import Any, Dict, Mapping, Optional, Sequence, Union

import torch
from ignite.metrics import Metric
from torch import nn

from deepchecks.core.check_result import CheckResult
from deepchecks.core.checks import DatasetKind, ModelOnlyBaseCheck, SingleDatasetBaseCheck, TrainTestBaseCheck
from deepchecks.utils.ipython import ProgressBarGroup
from deepchecks.vision import deprecation_warnings  # pylint: disable=unused-import # noqa: F401
from deepchecks.vision._shared_docs import docstrings
from deepchecks.vision.batch_wrapper import Batch
from deepchecks.vision.context import Context
from deepchecks.vision.utils.vision_properties import STATIC_PROPERTIES_FORMAT
from deepchecks.vision.vision_data import VisionData

In [12]:
train_data_dir = train_dir
test_data_dir = test_dir
recursive=True
class DatasetLoader(Dataset):
    def __init__(self, root):
        self.root = root
        img_paths = []
        img_labels = []
        label = root.split('/')[-1]       
        if recursive :
            for filename in os.listdir(root):
                if filename.split('.')[1] not in ['png','jpg','jpeg'] : 
                    continue
                img_paths.append(os.path.join(root,filename))
                img_labels.append(label)
            img_paths=img_paths[:101]
        else :
            categories = os.listdir(root)
            for cat_index, cat in enumerate(categories):
                directory = os.path.join(root,cat)
                for filename in os.listdir(directory):
                    if filename.split('.')[1] not in ['png','jpg','jpeg'] : 
                        continue
                    img_paths.append(os.path.join(directory,filename))
                    img_labels.append(cat_index)


        self.images_filepaths = img_paths
        self.labels = img_labels

    def image_from_path(self,path) :
        trans = transforms.ToTensor()
        return trans(Image.open(path))
    def __getitem__(self, idx):
        return self.image_from_path(self.images_filepaths[idx]), self.labels[idx]
    def __len__(self):
        return len(self.images_filepaths)

class DeepCheckData(VisionData):
    def batch_to_images(self, batch):
        imgs = batch[0].detach().numpy().transpose((0, 2, 3, 1))
        return imgs*255

In [13]:
train_dataset = DatasetLoader(train_data_dir)
val_dataset = DatasetLoader(test_data_dir)

train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True,generator=torch.Generator())
test_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=True,generator=torch.Generator())

train_ds = DeepCheckData(train_dataloader)
test_ds = DeepCheckData(test_dataloader)

check = ImagePropertyDrift()#.add_condition_drift_score_less_than(0.1)
result = check.run(train_ds, test_ds)
result.value





{'Aspect Ratio': 0,
 'Area': 1.0,
 'Brightness': 0.9701847949224576,
 'RMS Contrast': 0.7912360467984437,
 'Mean Red Relative Intensity': 0.8091872211723896,
 'Mean Green Relative Intensity': 0.8833973986096024,
 'Mean Blue Relative Intensity': 0.5636379491147916}

In [1]:
from scipy.stats import wasserstein_distance
drift_score={}
margin_quantile_filter=0.025
for prop in properties:
    dist1,dist2=train_features[prop],test_features[prop]
    dist1_qt_min, dist1_qt_max = np.quantile(dist1, [margin_quantile_filter, 1 - margin_quantile_filter])
    dist2_qt_min, dist2_qt_max = np.quantile(dist2, [margin_quantile_filter, 1 - margin_quantile_filter])
    dist1 = dist1[(dist1_qt_max >= dist1) & (dist1 >= dist1_qt_min)]
    dist2 = dist2[(dist2_qt_max >= dist2) & (dist2 >= dist2_qt_min)]

    val_max = np.max([np.max(dist1), np.max(dist2)])
    val_min = np.min([np.min(dist1), np.min(dist2)])
    if val_max == val_min:
        drift_score[prop]=0
    else:
        dist1 = (dist1 - val_min) / (val_max - val_min)
        dist2 = (dist2 - val_min) / (val_max - val_min)
        drift_score[prop]=wasserstein_distance(dist1,dist2)
drift_score

NameError: name 'properties' is not defined