In [1]:
from pathlib import Path
import os
from tqdm import tqdm
from glob import glob
from pathlib import Path
import numpy as np
import pandas as pd
import torch
from fastai.data.all import *
from fastai.vision.all import *

In [2]:
image_file_extensions = ('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')

def is_image_path_valid(path: Path):
    return path.is_file() and path.suffix in image_file_extensions

def load_image_file(path):
    return Image.open(path)

In [3]:
from utils import load_images_recursively

In [4]:
image_dir = Path("./data/new_crops")

In [5]:
# fns = get_image_files(image_dir / 'before')
# failed = verify_images(fns)

In [6]:
def load_image_paths(path: Path):
    fns = get_image_files(path)
    
    return fns

In [7]:
before_paths = load_image_paths(image_dir / 'before')
after_paths = load_image_paths(image_dir / 'after')

In [12]:
from PIL import Image
import numpy as np

mean_rgb = (131.0912, 103.8827, 91.4953)
image_shape = (224,224,3)

def load_image_for_feature_extraction(path='', shape=image_shape):
    '''
    Referenced from VGGFace2 Paper:
    Q. Cao, L. Shen, W. Xie, O. M. Parkhi, and A. Zisserman, “VGGFace2: A dataset for recognising faces across pose and age,” arXiv:1710.08092 [cs], May 2018
    '''
    short_size = 224.0
    crop_size = shape
    img = Image.open(path)
    im_shape = np.array(img.size)    # in the format of (width, height, *)
    img = img.convert('RGB')

    ratio = float(short_size) / np.min(im_shape)
    img = img.resize(size=(int(np.ceil(im_shape[0] * ratio)),   # width
                           int(np.ceil(im_shape[1] * ratio))),  # height
                     resample=Image.BILINEAR)

    x = np.array(img)  # image has been transposed into (height, width)
    newshape = x.shape[:2]
    h_start = (newshape[0] - crop_size[0])//2
    w_start = (newshape[1] - crop_size[1])//2
    x = x[h_start:h_start+crop_size[0], w_start:w_start+crop_size[1]]
    
    # normalize colors to prevent overfitting on color differences 
    x = x - mean_rgb
    
    # returns transformed image, and original image
    return x

In [13]:
import joblib

lasso_model = joblib.load("./saved_model/lasso.joblib")

In [14]:
from saved_model.prepare_resnet50 import prepare_resnet_model

resnet_model = prepare_resnet_model("./saved_model/resnet50_ft_weight.pkl")

In [15]:
from saved_model.binary_classifier import load_pretrained_classifier

binary_classifier = load_pretrained_classifier('./saved_model/weights-2.pth')

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def full_lime_pipeline(x):
    x = torch.Tensor(x.transpose(0, 3, 1, 2))  # nx3x224x224
    x = x.to(device)
    x = resnet_model(x).detach().cpu().numpy()
    return lasso_model.predict(x)

In [17]:
def full_nn_pipeline(x):
    x = torch.Tensor(x.transpose(0, 3, 1, 2))  # 1x3x224x224
    x = x.to(device)
    x = resnet_model(x)
    x = torch.sigmoid(binary_classifier(x))
    x = torch.round(x)
    return x.detach().cpu().numpy()

In [18]:
results = pd.DataFrame({'sample_paths': before_paths})

In [19]:
results['lasso_results_1'] = np.nan
results['nn_results_1'] = np.nan
results['lasso_results_2'] = np.nan
results['nn_results_2'] = np.nan

In [20]:
from tqdm import tqdm
import math

In [21]:
np.expand_dims(load_image_for_feature_extraction(before_paths[0]), 0).shape

(1, 224, 224, 3)

In [22]:
def preprocess(path):
    return np.expand_dims(load_image_for_feature_extraction(path), 0)

for i, (before_path, after_path) in tqdm(enumerate(zip(before_paths, after_paths)), total=len(before_paths)):
    if before_path.stem != after_path.stem:
        print(f"Before and after don't match for index {i}, before: {before_path}, after: {after_path}")
        break
        
    results.loc[i,'lasso_results_1'] = full_lime_pipeline(preprocess(before_path))
    results.loc[i,'lasso_results_2'] = full_lime_pipeline(preprocess(after_path))
    results.loc[i,'nn_results_1'] = full_nn_pipeline(preprocess(before_path)).squeeze()
    results.loc[i,'nn_results_2'] = full_nn_pipeline(preprocess(after_path)).squeeze()

100%|███████████████████████████████████████| 1292/1292 [00:48<00:00, 26.59it/s]


In [83]:
results

Unnamed: 0,sample_paths,lasso_results_1,nn_results_1,lasso_results_2,nn_results_2
0,data/new_crops/before/haircut face before after 3_89.jpg,1.0,1.0,0.0,1.0
1,data/new_crops/before/makeup before after 3_108.jpg,0.0,0.0,0.0,1.0
2,data/new_crops/before/makeup before after 3_55.jpg,0.0,0.0,0.0,0.0
3,data/new_crops/before/makeup before after arabic_51.jpg,1.0,1.0,1.0,1.0
4,data/new_crops/before/beard before after 2_30.jpg,0.0,0.0,0.0,0.0
...,...,...,...,...,...
1287,data/new_crops/before/makeup before after 3_39.jpg,1.0,0.0,1.0,1.0
1288,data/new_crops/before/glasses before after 1_11.jpg,0.0,0.0,0.0,0.0
1289,data/new_crops/before/beard before after 1_32.jpg,0.0,0.0,0.0,0.0
1290,data/new_crops/before/haircut face before after 1_69.jpg,0.0,0.0,0.0,1.0


In [38]:
total = len(results)
print(f"Total images: {total}")

lasso_1, lasso_2 = results['lasso_results_1'], results['lasso_results_2']

lasso_flipped = (lasso_1 != lasso_2).sum()
print(f"Predictions that flipped between liberal and conservative using logistic regression model: {lasso_flipped}")

nn_flipped = (results['nn_results_1'] != results['nn_results_2']).sum()
print(f"Predictions that flipped between liberal and conservative using neural net: {nn_flipped}")

results['lasso_results_1'].isna().sum()

lasso_lib_to_con = len(results[(results['lasso_results_1'] == 0) & (results['lasso_results_2'] == 1)])
print(f"Predictions that flipped from liberal to conservative using logistic regression model: {lasso_lib_to_con}")

lasso_con_to_lib = len(results[(results['lasso_results_1'] == 1) & (results['lasso_results_2'] == 0)])
print(f"Predictions that flipped from conservative to liberal using logistic regression model: {lasso_con_to_lib}")

nn_lib_to_con = len(results[(results['nn_results_1'] == 0) & (results['nn_results_2'] == 1)])
print(f"Predictions that flipped from liberal to conservative using neural network: {nn_lib_to_con}")

nn_con_to_lib = len(results[(results['nn_results_1'] == 1) & (results['nn_results_2'] == 0)])
print(f"Predictions that flipped from conservative to liberal using neural network: {nn_con_to_lib}")

Total images: 1292
Predictions that flipped between liberal and conservative using logistic regression model: 200
Predictions that flipped between liberal and conservative using neural net: 428
Predictions that flipped from liberal to conservative using logistic regression model: 92
Predictions that flipped from conservative to liberal using logistic regression model: 108
Predictions that flipped from liberal to conservative using neural network: 231
Predictions that flipped from conservative to liberal using neural network: 197


In [3]:
results_path = Path("./results/batch_preds.csv")

In [105]:
results.to_csv(results_path)

In [6]:
pred_results = pd.read_csv(results_path, index_col=0)

In [7]:
pred_results

Unnamed: 0,sample_paths,lasso_results_1,nn_results_1,lasso_results_2,nn_results_2
0,data/new_crops/before/haircut face before after 3_89.jpg,1.0,1.0,0.0,1.0
1,data/new_crops/before/makeup before after 3_108.jpg,0.0,0.0,0.0,1.0
2,data/new_crops/before/makeup before after 3_55.jpg,0.0,0.0,0.0,0.0
3,data/new_crops/before/makeup before after arabic_51.jpg,1.0,1.0,1.0,1.0
4,data/new_crops/before/beard before after 2_30.jpg,0.0,0.0,0.0,0.0
...,...,...,...,...,...
1287,data/new_crops/before/makeup before after 3_39.jpg,1.0,0.0,1.0,1.0
1288,data/new_crops/before/glasses before after 1_11.jpg,0.0,0.0,0.0,0.0
1289,data/new_crops/before/beard before after 1_32.jpg,0.0,0.0,0.0,0.0
1290,data/new_crops/before/haircut face before after 1_69.jpg,0.0,0.0,0.0,1.0


In [22]:
lasso_1 = pred_results['lasso_results_1']
lasso_2 = pred_results['lasso_results_2']
nn_1 = pred_results['nn_results_1']
nn_2 = pred_results['nn_results_2']
fns = pred_results['sample_paths']

In [23]:
# extract filename without extension from path
fns = fns.map(lambda fn: Path(fn).stem)

In [40]:
categories = fns.map(lambda fn: fn.split()[0])

In [31]:
def get_cat(stem):
    cat_dict = {
        'makeupe': 'makeup',
        'hiardoo': 'hairdoo',
        'hairdoocut': 'haircut'
    }
    
    first_word = stem.split()[0]
    
    if first_word in cat_dict.keys():
        return cat_dict[stem]
    else:
        return first_word

In [41]:
categories.unique()

array(['haircut', 'makeup', 'beard', 'images', 'makeupe', 'drag',
       'hiardoo', 'hairdoo', 'glasses', 'hairdoocut'], dtype=object)

In [42]:
categories = categories.map(get_cat)

In [49]:
idx = np.where(categories == 'images')[0]

In [54]:
categories[idx] = fns[idx].map(lambda s: s.split()[2])

In [56]:
cat_names = categories.unique()
cat_names

array(['haircut', 'makeup', 'beard', 'drag', 'hairdoo', 'glasses'],
      dtype=object)

In [63]:
haircut_preds = pred_results[categories == 'haircut']

In [66]:
print(f"Number of images before and after haircuts: {len(haircut_preds)}")

Number of images before and after haircuts: 287


In [72]:
print(f"Haircut before predicted liberal: {len(haircut_preds[haircut_preds['lasso_results_1'] == 0])}")

Haircut before predicted liberal: 234


In [73]:
print(f"Haircut before predicted conservative: {len(haircut_preds[haircut_preds['lasso_results_1'] == 1])}")

Haircut before predicted conservative: 53


In [128]:
def get_blank_df():
    df = pd.DataFrame(index=cat_names)
    df['total'] = np.nan
    df['before liberal'] = np.nan
    df['before conservative'] = np.nan
    df['after liberal'] = np.nan
    df['after conservative'] = np.nan
    
    return df

In [129]:
def get_analysis(df: pd.DataFrame = None, before_colname = "", after_colname = ""):
    
    if df == None:
        df = get_blank_df()
        
    for cat in cat_names:
        pred_slice = pred_results[categories == cat]
        total = len(pred_slice)
        before_lib = len(pred_slice[pred_slice[before_colname] == 0])
        before_con = len(pred_slice[pred_slice[before_colname] == 1])
        after_lib = len(pred_slice[pred_slice[after_colname] == 0])
        after_con = len(pred_slice[pred_slice[after_colname] == 1])

        df.loc[[cat], ['total']] = total
        df.loc[[cat], ['before liberal']] = before_lib
        df.loc[[cat], ['before conservative']] = before_con
        df.loc[[cat], ['after liberal']] = after_lib
        df.loc[[cat], ['after conservative']] = after_con

    total = df.iloc[:6].sum()
    total.name = 'total'
    df.append(total.transpose())

    return df

In [130]:
lasso_analysis_df = get_analysis(before_colname="lasso_results_1", after_colname="lasso_results_2")

In [131]:
lasso_analysis_df

Unnamed: 0,total,before liberal,before conservative,after liberal,after conservative
haircut,287.0,234.0,53.0,236.0,51.0
makeup,386.0,332.0,54.0,336.0,50.0
beard,357.0,320.0,37.0,339.0,18.0
drag,168.0,146.0,22.0,141.0,27.0
hairdoo,68.0,60.0,8.0,59.0,9.0
glasses,26.0,24.0,2.0,21.0,5.0


In [123]:
lasso_analysis_df.to_csv('./results/lasso_retest_analysis.csv')

In [132]:
nn_analysis_df = get_analysis(before_colname="nn_results_1", after_colname="nn_results_2")

In [133]:
nn_analysis_df

Unnamed: 0,total,before liberal,before conservative,after liberal,after conservative
haircut,287.0,142.0,145.0,134.0,153.0
makeup,386.0,205.0,181.0,206.0,180.0
beard,357.0,175.0,182.0,152.0,205.0
drag,168.0,76.0,92.0,69.0,99.0
hairdoo,68.0,24.0,44.0,27.0,41.0
glasses,26.0,15.0,11.0,15.0,11.0


In [134]:
nn_analysis_df.to_csv('./results/nn_retest_analysis.csv')