## Image property drift

In [3]:
%matplotlib inline
# !pip install --upgrade plotly 
# deepchecks for tabular data:
!pip install deepchecks --upgrade -qq
# for installing deepchecks including the computer vision subpackage (note - Pytorch should be installed separately):
!pip install "deepchecks[vision]" --upgrade -qq
!pip freeze > requirements/deepchecks-requirements.txt

In [44]:
import os
from skimage import io, transform
from PIL import Image
# from deepchecks.vision import Batch, Context, TrainTestCheck

import torch
from torch.utils.data import Dataset,DataLoader
import torchvision.transforms as transforms

import deepchecks
from deepchecks.vision import VisionData
from deepchecks.vision.checks import ImagePropertyDrift


def image_property_drift(train_data_dir,test_data_dir,hash_size=8,recursive=True,show=False) :
    res = dict()   
    class DatasetLoader(Dataset):
        def __init__(self, root):
            self.root = root
            img_paths = []
            img_labels = []
            label = root.split('/')[-1]       
            if recursive :
                for filename in os.listdir(root):
                    if filename.split('.')[1] not in ['png','jpg','jpeg'] : 
                        continue
                    img_paths.append(os.path.join(root,filename))
                    img_labels.append(label)
            else :
                categories = os.listdir(root)
                for cat_index, cat in enumerate(categories):
                    directory = os.path.join(root,cat)
                    for filename in os.listdir(directory):
                        if filename.split('.')[1] not in ['png','jpg','jpeg'] : 
                            continue
                        img_paths.append(os.path.join(directory,filename))
                        img_labels.append(cat_index)
            

            self.images_filepaths = img_paths
            self.labels = img_labels
            self.transform = transforms.Compose([transforms.Resize(255),
                                                 transforms.CenterCrop(224),
                                                transforms.ToTensor()])

        def image_from_path(self,path) :
            image = Image.open(path) 
            if self.transform:
                image = self.transform(image)
            return image
        def __getitem__(self, idx):
            return self.image_from_path(self.images_filepaths[idx]), self.labels[idx]
        def __len__(self):
            return len(self.images_filepaths)
        
    class DeepCheckData(VisionData):
        def batch_to_images(self, batch):
            imgs = batch[0].detach().numpy().transpose((0, 2, 3, 1))
            return imgs*255
        
    if recursive :
        labels = os.listdir(train_data_dir)
        for label in labels :
            train_dataset = DatasetLoader(train_data_dir+label)
            val_dataset = DatasetLoader(test_data_dir+label)

            train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True,generator=torch.Generator())
            test_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True,generator=torch.Generator())

            train_ds = DeepCheckData(train_dataloader)
            test_ds = DeepCheckData(test_dataloader) 

            check = ImagePropertyDrift() #hash_size=hash_size
            result = check.run(train_ds, test_ds)
            
            # result = check.run(train_ds, test_ds)
#             image_leakage_paths = []
#             for (train_idx,test_idx) in result.value :
#                 image_leakage_paths.append((train_ds.data_loader.dataset.images_filepaths[train_idx],
#                                            test_ds.data_loader.dataset.images_filepaths[test_idx]))

#             res[label] = image_leakage_paths
    else :
        train_dataset = DatasetLoader(train_data_dir)
        val_dataset = DatasetLoader(test_data_dir)

        train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True,generator=torch.Generator())
        test_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True,generator=torch.Generator())

        train_ds = DeepCheckData(train_dataloader)
        test_ds = DeepCheckData(test_dataloader) 
        
        check = ImagePropertyDrift() #hash_size=hash_size
        result = check.run(train_ds, test_ds)
       
        image_leakage_paths = []
#         print(train_ds.data_loader.dataset.images_filepaths[0])
#         for (train_idx,test_idx) in result.value :
#             image_leakage_paths.append((train_ds.data_loader.dataset.images_filepaths[train_idx],
#                                         test_ds.data_loader.dataset.images_filepaths[test_idx]))
#         res['data'] = image_leakage_paths
    
#     if show :
#         for i in res :
#             print(i)
#             j = res[i]
#             transformer = transforms.Compose([transforms.Resize(255),
#                                                 transforms.CenterCrop(224)])


#             for (train_path,test_path) in j :
#                 print(train_path,test_path)
#                 train_image = Image.open(train_path) 
#                 test_image = Image.open(test_path) 
#                 train_image = transformer(train_image)
#                 test_image = transformer(test_image)
#                 train_image.show()
                # test_image.show()
    
    return res,result
    
train_data_dir = "/project/datasets/DataSets/train/"
test_data_dir = "/project/datasets/DataSets/val/"
_,result=image_property_drift(train_data_dir,test_data_dir,hash_size=8,recursive=False,show=True)
print("final result",image_property_drift(train_data_dir,test_data_dir,hash_size=8,recursive=True,show=True))

























final result ({}, Image Property Drift: {'Aspect Ratio': 0, 'Area': 0, 'Brightness': 0.017514123167762095, 'RMS Contrast': 0.03504947980271776, 'Mean Red Relative Intensity': 0.01921503969967702, 'Mean Green Relative Intensity': 0.06605062142233545, 'Mean Blue Relative Intensity': 0.03661109238926372})


In [45]:
result.show()
# https://docs.deepchecks.com/en/stable/checks_gallery/vision/train_test_validation/plot_image_property_drift.html

VBox(children=(HTML(value='<h4><b>Image Property Drift</b></h4>'), HTML(value='<p>    Calculate drift between …

In [24]:
!jupyter labextension list

JupyterLab v3.3.1
/usr/local/share/jupyter/labextensions
        jupyterlab-plotly v5.5.0 [32menabled[0m [32mOK[0m
        @jupyter-widgets/jupyterlab-manager v3.0.1 [32menabled[0m [32mOK[0m (python, jupyterlab_widgets)



In [13]:
dir(result)

['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_ipython_display_',
 '_repr_html_',
 '_repr_json_',
 '_repr_mimebundle_',
 'check',
 'conditions_results',
 'display',
 'display_check',
 'from_json',
 'get_check_id',
 'get_header',
 'get_metadata',
 'have_conditions',
 'have_display',
 'header',
 'html_serializer',
 'ipython_serializer',
 'passed_conditions',
 'priority',
 'process_conditions',
 'reduce_output',
 'save_as_html',
 'show',
 'show_in_iframe',
 'show_in_window',
 'show_not_interactive',
 'to_json',
 'to_wandb',
 'to_widget',
 'value',
 'widget_serializer']

In [33]:
!python image_drift.py -r -c
# !python3 untitled.py

Validating Input:
Validating Input:
|█████| 1/1 [Time: 00:00]
Ingesting Batches - Train Dataset:
|                                            [A
Ingesting Batches - Train Dataset:
|████████████████████████████████████████████[A
Ingesting Batches - Train Dataset:
|████████████████████████████████████████████[A
Ingesting Batches - Train Dataset:
|████████████████████████████████████████████[A
Ingesting Batches - Train Dataset:
|████████████████████████████████████████████[A
Ingesting Batches - Train Dataset:
|████████████████████████████████████████████[A

Ingesting Batches - Test Dataset:
|                                             [A[A

Ingesting Batches - Test Dataset:
|█████████████████████████████████████████████[A[A

Ingesting Batches - Test Dataset:
|█████████████████████████████████████████████[A[A


Computing Check:
|     | 0/1 [Time: 00:00][A[A[A


Computing Check:
|█████| 1/1 [Time: 00:00][A[A[A


Computing Check:
                                           