In [72]:
from concurrent.futures import ProcessPoolExecutor
# from loky import ProcessPoolExecutor  # for Windows users

def parallel(func, iterable):
    e = ProcessPoolExecutor()
    return e.map(func, iterable)

In [73]:
from pathlib import Path
import os

In [74]:
image_file_extensions = ('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')

def is_image_path_valid(path: Path):
    return path.is_file() and path.suffix in image_file_extensions

def verify_image(fn):
    "Confirm that `fn` can be opened"
    try:
        im = Image.open(fn)
        im.draft(im.mode, (32,32))
        im.load()
        return True
    except: return False

def load_image_file(path):
    return Image.open(path)

In [75]:
def load_images_recursively(root_dir: Path):
    ls = os.listdir
    
    images = []
    label2image = []
    
    def append_if_image(root: Path, filename: str):
        path = root / filename
        
        if is_image_path_valid(path):
            images.append(path)
            label2image.append(root.stem)
        
    for filename in ls(root_dir):
        file_path = root_dir / filename
            
        if file_path.is_dir():
            for nested_filename in ls(file_path):
                append_if_image(file_path, nested_filename)
        else:
            append_if_image(root_dir, filename)
            
    return images, label2image

In [76]:
from glob import glob
from pathlib import Path

In [77]:
sample_paths = [Path(g) for g in glob("./data/new_image_crops/*")]

In [78]:
len(sample_paths)

595

In [79]:
import numpy as np

In [80]:
from PIL import Image
import numpy as np

mean_rgb = (131.0912, 103.8827, 91.4953)

def load_image_for_feature_extraction(path='', shape=None):
    '''
    Referenced from VGGFace2 Paper:
    Q. Cao, L. Shen, W. Xie, O. M. Parkhi, and A. Zisserman, “VGGFace2: A dataset for recognising faces across pose and age,” arXiv:1710.08092 [cs], May 2018
    '''
    short_size = 224.0
    crop_size = shape
    img = Image.open(path)
    im_shape = np.array(img.size)    # in the format of (width, height, *)
    img = img.convert('RGB')

    ratio = float(short_size) / np.min(im_shape)
    img = img.resize(size=(int(np.ceil(im_shape[0] * ratio)),   # width
                           int(np.ceil(im_shape[1] * ratio))),  # height
                     resample=Image.BILINEAR)

    x = np.array(img)  # image has been transposed into (height, width)
    newshape = x.shape[:2]
    h_start = (newshape[0] - crop_size[0])//2
    w_start = (newshape[1] - crop_size[1])//2
    x = x[h_start:h_start+crop_size[0], w_start:w_start+crop_size[1]]
    
    # normalize colors to prevent overfitting on color differences 
    x = x - mean_rgb
    
    # returns transformed image, and original image
    return x

In [81]:
im = load_image_for_feature_extraction(sample_paths[0] / "0.jpg", shape=(224,224,3))
im.shape

(224, 224, 3)

In [82]:
import warnings
image_size = (224,224,3)

def generate_batch(batch_size=16, shuffle=True):
    total_samples = len(sample_paths)
    
    if shuffle:
        idx = np.random.permutation(total_samples)
    else:
        idx = np.arange(total_samples)
        
    
    for ndx in range(0, total_samples, batch_size):
        batch_idx = idx[ndx: np.min([ndx + batch_size, total_samples])]
        
        batch_paths = np.array(sample_paths)[batch_idx]
        
        batch_images = []
        batch_image2idx = []
               
        for i, (nid, path) in enumerate(zip(batch_idx, batch_paths)):
            sub_image_paths = os.listdir(path)
            
            if(len(sub_image_paths) != 2):
                warnings.warn(f"{path} has {len(sub_image_paths)} files")
            else:
                
                batch_images.append(load_image_for_feature_extraction(path / sub_image_paths[0], image_size))
                batch_images.append(load_image_for_feature_extraction(path / sub_image_paths[1], image_size))
                batch_image2idx.extend([nid, nid])
        
        yield np.stack(batch_images), np.stack(batch_image2idx)

In [83]:
import joblib

lasso_model = joblib.load("./saved_model/lasso.joblib")

In [84]:
from saved_model.prepare_resnet50 import prepare_resnet_model

resnet_model = prepare_resnet_model("./saved_model/resnet50_ft_weight.pkl")

In [85]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def full_lime_pipeline(x):
    x = torch.Tensor(x.transpose(0, 3, 1, 2))  # nx3x224x224
    x = x.to(device)
    x = resnet_model(x).detach().cpu().numpy()
    return lasso_model.predict(x)

In [86]:
import pandas as pd

results = pd.DataFrame({'sample_paths': sample_paths})

In [87]:
results['lasso_results_1'] = np.nan
results['nn_results_1'] = np.nan
results['lasso_results_2'] = np.nan
results['nn_results_2'] = np.nan

In [88]:
from tqdm import tqdm
import math

In [89]:
batch_size = 4
len_of_generator = math.ceil(len(sample_paths) / batch_size)

In [90]:
np.unique(batch_image2idx, return_index=True)

(array([592, 593]), array([0, 2]))

In [91]:
for batch_images, batch_image2idx in tqdm(generate_batch(shuffle=False, batch_size=batch_size), total=len_of_generator):
    lasso_predictions = full_lime_pipeline(batch_images)
    
    unique_samples, unique_idx = np.unique(batch_image2idx, return_index=True)
    
    for sample_id, index in zip(unique_samples, unique_idx):
        results.loc[[sample_id],'lasso_results_1'] = lasso_predictions[index]
        results.loc[[sample_id],'lasso_results_2'] = lasso_predictions[index + 1]

100%|█████████████████████████████████████████| 149/149 [00:06<00:00, 21.52it/s]


In [92]:
results

Unnamed: 0,sample_paths,lasso_results_1,nn_results_1,lasso_results_2,nn_results_2
0,data/new_image_crops/haircut face befoe after ...,0.0,,1.0,
1,data/new_image_crops/beard before after 3_28,0.0,,0.0,
2,data/new_image_crops/beard before after face c...,1.0,,1.0,
3,data/new_image_crops/beard before after 2_20,0.0,,0.0,
4,data/new_image_crops/makeup before after 4_15,1.0,,0.0,
...,...,...,...,...,...
590,data/new_image_crops/drag queen face before af...,0.0,,0.0,
591,data/new_image_crops/makeup before after chine...,0.0,,0.0,
592,data/new_image_crops/makeup before after 3_24,0.0,,0.0,
593,data/new_image_crops/hairdoo before and after ...,0.0,,0.0,


In [93]:
(results['lasso_results_1'] != results['lasso_results_2']).sum()

78

In [94]:
results['lasso_results_1'].isna().sum()

0

In [95]:
from saved_model.binary_classifier import load_pretrained_classifier

binary_classifier = load_pretrained_classifier('./saved_model/weights-2.pth')

In [96]:
def full_nn_pipeline(x):
    x = torch.Tensor(x.transpose(0, 3, 1, 2))  # 1x3x224x224
    x = x.to(device)
    x = resnet_model(x)
    x = torch.sigmoid(binary_classifier(x))
    x = torch.round(x)
    return x.detach().cpu().numpy()

In [97]:
full_nn_pipeline(batch_images).squeeze()

array([1., 1., 1., 1., 0., 1.], dtype=float32)

In [98]:
for batch_images, batch_image2idx in tqdm(generate_batch(shuffle=False, batch_size=batch_size), total=len_of_generator):
    nn_predictions = full_nn_pipeline(batch_images)
    
    unique_samples, unique_idx = np.unique(batch_image2idx, return_index=True)
    
    for sample_id, index in zip(unique_samples, unique_idx):
        results.loc[[sample_id],'nn_results_1'] = nn_predictions[index]
        results.loc[[sample_id],'nn_results_2'] = nn_predictions[index + 1]
    

100%|█████████████████████████████████████████| 149/149 [00:06<00:00, 24.36it/s]


In [99]:
results

Unnamed: 0,sample_paths,lasso_results_1,nn_results_1,lasso_results_2,nn_results_2
0,data/new_image_crops/haircut face befoe after ...,0.0,1.0,1.0,1.0
1,data/new_image_crops/beard before after 3_28,0.0,1.0,0.0,1.0
2,data/new_image_crops/beard before after face c...,1.0,1.0,1.0,1.0
3,data/new_image_crops/beard before after 2_20,0.0,1.0,0.0,1.0
4,data/new_image_crops/makeup before after 4_15,1.0,1.0,0.0,1.0
...,...,...,...,...,...
590,data/new_image_crops/drag queen face before af...,0.0,1.0,0.0,0.0
591,data/new_image_crops/makeup before after chine...,0.0,1.0,0.0,1.0
592,data/new_image_crops/makeup before after 3_24,0.0,1.0,0.0,1.0
593,data/new_image_crops/hairdoo before and after ...,0.0,1.0,0.0,1.0


In [100]:
lasso_1, lasso_2 = results['lasso_results_1'], results['lasso_results_2']

In [101]:
(lasso_1 != lasso_2).sum()

78

In [102]:
(results['nn_results_1'] != results['nn_results_2']).sum()

201

In [103]:
results['lasso_results_1'].isna().sum()

0

In [104]:
results.to_csv("./results/batch_preds.csv")

In [105]:
len(results)

595