In [1]:
import pickle
import pandas as pd

from PIL import Image
from pathlib import Path
from torchvision import transforms
from tqdm import tqdm

from utils.ScrambleImage import ScrambleImage, channels_first, channels_last

# Create datasets

In [2]:
save_folder = Path("data/")

Load `pickle` file with the images that are in the training dataset of regular ImageNet

In [4]:
with open(save_folder / 'imgn_pict.pkl', 'rb') as f:
    imgn_pict = pickle.load(f)

In [5]:
len(imgn_pict)

4312

The classes we are going to analyze

In [6]:
imgn_folders = set(img_path.parent.stem for img_path in imgn_pict)
imgn_folders

{'n01440764',
 'n02102040',
 'n02979186',
 'n03000684',
 'n03028079',
 'n03394916',
 'n03417042',
 'n03425413',
 'n03445777',
 'n03888257'}

Create a list of dictionaries that combine the path and the class of a file

In [7]:
imgn_class = [{"class":img_path.parent.stem, "path":img_path} for img_path in imgn_pict]
imgn_class[:2]

[{'class': 'n03394916',
  'path': PosixPath('data/imagenette2/train/n03394916/n03394916_23108.JPEG')},
 {'class': 'n03394916',
  'path': PosixPath('data/imagenette2/train/n03394916/n03394916_59626.JPEG')}]

For convenience sake let's create a dataframe

In [8]:
data = pd.DataFrame(imgn_class)
data[:5]

Unnamed: 0,class,path
0,n03394916,data/imagenette2/train/n03394916/n03394916_231...
1,n03394916,data/imagenette2/train/n03394916/n03394916_596...
2,n03394916,data/imagenette2/train/n03394916/n03394916_552...
3,n03394916,data/imagenette2/train/n03394916/n03394916_329...
4,n03394916,data/imagenette2/train/n03394916/n03394916_619...


### Randomly choose 3 images per class

In [8]:
grouped = data.groupby("class")
grouped = grouped.apply(lambda x: x.sample(n=3, random_state=1))

grouped.reset_index(level=0, drop=True, inplace=True)
grouped.reset_index(drop=True, inplace=True)
grouped[:5]

Unnamed: 0,class,path
0,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...
1,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...
2,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...
3,n02102040,/home/malte/Dokumente/Masterarbeit/data/imagen...
4,n02102040,/home/malte/Dokumente/Masterarbeit/data/imagen...


____________

## 1. Create scrambled image files

Create top folder

In [11]:
folder_scr = save_folder / "imagenette2_scr"
folder_scr.mkdir(exist_ok=True)

Create folder for each class

In [118]:
for img_class in imgn_folders:
    folder_scr_class = folder_scr / img_class
    folder_scr_class.mkdir(exist_ok=True)

Resize, crop and normalize image accoring to standard ImageNet values

In [17]:
transform = transforms.Compose([
 transforms.Resize(256),
 transforms.CenterCrop(224),
])

transform_normalize = transforms.Normalize(
     mean=[0.485, 0.456, 0.406],
     std=[0.229, 0.224, 0.225]
 )

How many different variants of patches could be created?

In [120]:
for i in range(1, 113):
    if 224%i == 0:
        devide = (224 / i)**2
        print(f"We can use size '{i}' and have '{devide}' equal patches")

We can use size '1' and have '50176.0' equal patches
We can use size '2' and have '12544.0' equal patches
We can use size '4' and have '3136.0' equal patches
We can use size '7' and have '1024.0' equal patches
We can use size '8' and have '784.0' equal patches
We can use size '14' and have '256.0' equal patches
We can use size '16' and have '196.0' equal patches
We can use size '28' and have '64.0' equal patches
We can use size '32' and have '49.0' equal patches
We can use size '56' and have '16.0' equal patches
We can use size '112' and have '4.0' equal patches


Let's choose 4 distinct values for the block size

In [121]:
block_sz = [28, 32, 56, 112]

In [122]:
var = list(range(0,3))

#### Create dataset

Create new column for the indices to store the order of the scrambled files

In [123]:
grouped["scrambled_indices"] = None

Loop over the dataframe index and create a scrambled image for each `Path` object. 

In [124]:
for index, _class, path, indices in tqdm(grouped.itertuples()):
    # load image
    img = Image.open(path)
    # apply transformation
    trf_img = transform(img)
    # convert to numpy array
    trf_arr = np.array(trf_img)
    # convert to channels first
    trf_arr = channels_first(trf_arr)
    
    # also save unmodified image
    save_path = folder_scr / f"{_class}/{path.stem}_original.jpg"
    trf_img.save(save_path)
    
    for block_size in block_sz:
        temp_ind = {}
        
        for variant in var:
            scr = ScrambleImage(trf_arr, block_size)
            scr.scramble()
            ind, scr_arr = scr.export()
            temp_ind[variant] = ind
            
            scr_arr = channels_last(scr_arr)
            scr_img = Image.fromarray(scr_arr)
            save_path = folder_scr / f"{_class}/{path.stem}_{block_size}_{variant}.jpg"
            
            scr_img.save(save_path)
        
        grouped.at[index, "scrambled_indices"] = temp_ind

In [125]:
grouped[:5]

Unnamed: 0,class,path,scrambled_indices
0,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: [2, 3, 1, 0], 1: [0, 3, 1, 2], 2: [3, 0, 1..."
1,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: [1, 3, 0, 2], 1: [1, 3, 0, 2], 2: [2, 3, 1..."
2,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: [0, 1, 2, 3], 1: [1, 3, 0, 2], 2: [2, 1, 0..."
3,n02102040,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: [2, 0, 1, 3], 1: [0, 2, 3, 1], 2: [2, 1, 3..."
4,n02102040,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: [1, 3, 2, 0], 1: [2, 3, 0, 1], 2: [3, 0, 2..."


#### Save dataset to disk

In [126]:
grouped.to_csv(folder_scr / "scramble.csv")

____

## 2.Create patches dataset

In [9]:
from utils.PatchImage import PatchImage

Load `scramble.csv` to get the sampled image paths

In [10]:
grouped = pd.read_csv(folder_scr / "scramble.csv", index_col=0)
grouped[:5]

Unnamed: 0,class,path,scrambled_indices
0,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: array([2, 3, 1, 0]), 1: array([0, 3, 1, 2]..."
1,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: array([1, 3, 0, 2]), 1: array([1, 3, 0, 2]..."
2,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: array([0, 1, 2, 3]), 1: array([1, 3, 0, 2]..."
3,n02102040,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: array([2, 0, 1, 3]), 1: array([0, 2, 3, 1]..."
4,n02102040,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: array([1, 3, 2, 0]), 1: array([2, 3, 0, 1]..."


In [19]:
grouped["path"] = grouped["path"].apply(lambda x: Path(x))

Create top folder

In [11]:
folder_ptd = save_folder / "imagenette2_ptd"
folder_ptd.mkdir(exist_ok=True)

Create folder for each class

In [12]:
for img_class in imgn_folders:
    folder_ptd_class = folder_ptd / img_class
    folder_ptd_class.mkdir(exist_ok=True)

Define the patch sizes that are applied to each image

In [13]:
patch_sizes = range(10, 80, 10)

#### Create the dataset

In [20]:
for index, _class, path, indices in tqdm(grouped.itertuples()):
    # load image
    img = Image.open(path)
    # apply transformation
    trf_img = transform(img)
    
    # also save unmodified image
    save_path = folder_ptd / f"{_class}/{path.stem}_original.jpg"
    trf_img.save(save_path)
    
    for patch_size in patch_sizes:
        patch_img = PatchImage(trf_img, patch_size)
        
        for i, patch in enumerate(patch_img.patch()):
            save_path = folder_ptd / f"{_class}/{path.stem}_{patch_size}_{i}.jpg"
            patch.save(save_path)

30it [00:16,  1.79it/s]


____

## 3.Create smoothed dataset

For this process to work, we need some additional files from [this](https://github.com/zhufeida/Benchmark_EPS) repository

In [7]:
! wget -q -c https://raw.githubusercontent.com/zhufeida/Benchmark_EPS/master/Resnet_tl/module.py -P utils/ 

In [6]:
! wget -q https://github.com/zhufeida/Benchmark_EPS/raw/master/Resnet_tl/checkpoint_delta/model-345002.data-00000-of-00001 -P data/checkpoint_delta

In [4]:
! wget -q https://github.com/zhufeida/Benchmark_EPS/raw/master/Resnet_tl/checkpoint_delta/model-345002.index -P data/checkpoint_delta

In [5]:
! wget -q https://github.com/zhufeida/Benchmark_EPS/raw/master/Resnet_tl/checkpoint_delta/model-345002.meta -P data/checkpoint_delta

In [3]:
import tensorflow as tf
import numpy as np
import imageio

from utils.module import SRGAN_delta
from PIL import Image
from PIL import ImageChops
from PIL import ImageOps

#### 1. Build the `tensorflow` graph

In [42]:
tf.reset_default_graph()
test_input = tf.placeholder(tf.float32,[1, None, None,3], name='test')
test_input_scale = test_input/127.5-1
test_output = SRGAN_delta(test_input_scale,is_train=False,reuse=False)
saver = tf.train.Saver()
sess = tf.Session()

Instructions for updating:
Colocations handled automatically by placer.


#### 2. Load the checkpoint from the pretrained model

In [43]:
saver.restore(sess, save_folder / 'checkpoint_delta/model-345002')

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from checkpoint_delta/model-345002


In [44]:
folder_smt = save_folder / "imagenette2_smt"
folder_smt.mkdir(exist_ok=True)

Create folder for each class

In [45]:
for img_class in imgn_folders:
    folder_smt_class = folder_smt / img_class
    folder_smt_class.mkdir(exist_ok=True)

In [46]:
grouped = pd.read_csv(folder_scr / "scramble.csv", index_col=0)
grouped[:5]

Unnamed: 0,class,path,scrambled_indices
0,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: array([2, 3, 1, 0]), 1: array([0, 3, 1, 2]..."
1,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: array([1, 3, 0, 2]), 1: array([1, 3, 0, 2]..."
2,n01440764,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: array([0, 1, 2, 3]), 1: array([1, 3, 0, 2]..."
3,n02102040,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: array([2, 0, 1, 3]), 1: array([0, 2, 3, 1]..."
4,n02102040,/home/malte/Dokumente/Masterarbeit/data/imagen...,"{0: array([1, 3, 2, 0]), 1: array([2, 3, 0, 1]..."


In [47]:
grouped["path"] = grouped["path"].apply(lambda x: Path(x))

In [48]:
def smooth_image_array(img_arr):
    im = np.float32(img_arr)
    batch_images = [im]
    test_output_eval = sess.run(test_output,feed_dict={test_input: batch_images}) 
    im_out = test_output_eval[0]
    
    im_out = (np.float64(im_out) + 1) / 2 * 255
    im_out[im_out > 255] = 255
    im_out[im_out < 0] = 0
    
    return im_out / 255

In [49]:
smooth_op = list(range(1, 6))
smooth_op

[1, 2, 3, 4, 5]

In [50]:
for index, _class, path, indices in tqdm(grouped.itertuples()):
    # load image    
    im = imageio.imread(path)
    # also save unmodified image
    save_path = folder_smt / f"{_class}/{path.stem}_original.jpg"
    imageio.imwrite(save_path, im)
    
    # repeat smoothening 'smooth_op' times
    for op in smooth_op:
        if op == 1:
            smt_arr = im
        else:
            smt_arr = imageio.imread(save_path)
        smt_arr = smooth_image_array(smt_arr)
        save_path = folder_smt / f"{_class}/{path.stem}_{op}.jpg"
        imageio.imwrite(save_path, smt_arr)



Also create difference dataset that consists of the parts that are discarded in the smoothening process.

### 3.1 Create dataset that consists of the parts that are discarded in the smoothening process and their inverses

In [51]:
folder_smt_diff = save_folder / "imagenette2_smtdiff"
folder_smt_diff.mkdir(exist_ok=True)

folder_smt_diff_inv = save_folder / "imagenette2_smtdiffinv"
folder_smt_diff_inv.mkdir(exist_ok=True)

Create folder for each class

In [52]:
for img_class in imgn_folders:
    folder_smt_diff_class = folder_smt_diff / img_class
    folder_smt_diff_class.mkdir(exist_ok=True)

In [53]:
for img_class in imgn_folders:
    folder_smt_diff_inv_class = folder_smt_diff_inv / img_class
    folder_smt_diff_inv_class.mkdir(exist_ok=True)

In [54]:
smt_orig = sorted(folder_smt.rglob('*_original.jpg'))
smt_one = sorted(folder_smt.rglob('*_1.jpg'))

In [55]:
for orig, one in tqdm(zip(smt_orig, smt_one):
    orig_img = Image.open(orig)
    one_img = Image.open(one)
    
    subfolder = orig.parent.stem
    diff = ImageChops.difference(orig_img, one_img)
    diff.save(folder_smt_diff / orig.parent.stem / f"{orig.stem}_diff.jpg")
    invert_diff = ImageOps.invert(diff)
    invert_diff.save(folder_smt_diff_inv / orig.parent.stem / f"{orig.stem}_diff_inv.jpg")

#### Tranform images

Up to now the images still have the full size and are not regularized with the ImageNet presets.

Thus, the images are transformed.

In [66]:
def transform_img(img_folder):
    for img_pth in img_folder.rglob('*.jpg'):
        # load image
        img = Image.open(img_pth)
        # apply transformation
        trf_img = transform(img)
        trf_img.save(img_pth)

In [67]:
for img_folder in [folder_smt, folder_smt_diff, folder_smt_diff_inv]:
    transform_img(img_folder)