# todo
- multiprocessing


In [11]:
from collections import defaultdict
from scipy import ndimage as ndi
import matplotlib.pyplot as plt
from skimage import feature
from PIL import Image as IMG
import numpy as np
import pandas as pd 
import operator
import cv2
import os 
import gc
from IPython.core.display import HTML 
from IPython.display import Image
from myutils import timer

# global vars
## change this path depends on envs
images_path = './hoge/fuga/'

## const.
features_path = '../features/'



In [12]:
# define functions

def load_image(img, usecv2=False):
    path = images_path + img
    try:
        im = cv2.imread(path) if usecv2==True else IMG.open(path)
    except Exception as e:
        print('Cannot open img: ', images_path + img)    
    return im

def crop_horizontal(im):
    # cut the images into two halves as complete average may give bias results
    size = im.size
    halves = (size[0]/2, size[1]/2)
    im1 = im.crop((0, 0, size[0], halves[1]))
    im2 = im.crop((0, halves[1], size[0], size[1]))
    return im1, im2

def color_analysis(img):
    # obtain the color palatte of the image 
    palatte = defaultdict(int)
    for pixel in img.getdata():
        palatte[pixel] += 1

    # sort the colors present in the image 
    sorted_x = sorted(palatte.items(), key=operator.itemgetter(1), reverse = True)
    light_shade, dark_shade, shade_count, pixel_limit = 0, 0, 0, 25
    for i, x in enumerate(sorted_x[:pixel_limit]):
        if all(xx <= 20 for xx in x[0][:3]): ## dull : too much darkness 
            dark_shade += x[1]
        if all(xx >= 240 for xx in x[0][:3]): ## bright : too much whiteness 
            light_shade += x[1]
        shade_count += x[1]
        
    light_percent = round((float(light_shade)/shade_count)*100, 2)
    dark_percent = round((float(dark_shade)/shade_count)*100, 2)
    return light_percent, dark_percent

def perform_color_analysis(img, flag):
    im = load_image(img)
    im1, im2 = crop_horizontal(im)

    try:
        light_percent1, dark_percent1 = color_analysis(im1)
        light_percent2, dark_percent2 = color_analysis(im2)
    except Exception as e:
        print('Calculation error')
        return None

    light_percent = (light_percent1 + light_percent2)/2 
    dark_percent = (dark_percent1 + dark_percent2)/2 
    del im, im1, im2; gc.collect()

    if flag == 'black':
        return dark_percent
    elif flag == 'white':
        return light_percent
    else:
        return None

def average_pixel_width(img):
    im = load_image(img)    
    im_array = np.asarray(im.convert(mode='L'))
    edges_sigma1 = feature.canny(im_array, sigma=3)
    apw = (float(np.sum(edges_sigma1)) / (im.size[0]*im.size[1]))
    return apw*100

def get_dominant_color(img):
    im = load_image(img, usecv2=True)
    arr = np.float32(im)
    pixels = arr.reshape((-1, 3))

    n_colors = 5
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, .1)
    flags = cv2.KMEANS_RANDOM_CENTERS
    _, labels, centroids = cv2.kmeans(pixels, n_colors, None, criteria, 10, flags)

    palette = np.uint8(centroids)
    quantized = palette[labels.flatten()]
    quantized = quantized.reshape(im.shape)

    dominant_color = palette[np.argmax(np.unique(labels, return_counts=True)[1: ])]
    return dominant_color

def get_average_color(img):
    im = load_image(img, usecv2=True)
    average_color = [im[:, :, i].mean() for i in range(im.shape[-1])]
    return average_color

def getSize(img):
    filename = images_path + img
    st = os.stat(filename)
    return st.st_size

def getDimensions(img):
    im = load_image(img)
    img_size = im.size
    return img_size

def get_blurrness_score(img):
    im = load_image(img, usecv2=True)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    fm = cv2.Laplacian(im, cv2.CV_64F).var()
    return fm

def get_imagefeatures(features, imgcol, prefix=''):
    with timer('Image features extraction'):
        prefix = prefix + '_'
        features[prefix+'dullness'] = features[imgcol].apply(lambda x : perform_color_analysis(x, 'black'))
        features[prefix+'whiteness'] = features[imgcol].apply(lambda x : perform_color_analysis(x, 'white'))
        features[prefix+'average_pixel_width'] = features[imgcol].apply(average_pixel_width)
        features[prefix+'dominant_color'] = features[imgcol].apply(get_dominant_color)
        features[prefix+'dominant_red'] = features[prefix+'dominant_color'].apply(lambda x: x[0]) / 255
        features[prefix+'dominant_green'] = features[prefix+'dominant_color'].apply(lambda x: x[1]) / 255
        features[prefix+'dominant_blue'] = features[prefix+'dominant_color'].apply(lambda x: x[2]) / 255
        features.drop(prefix+'dominant_color', axis=1, inplace=True)
        features[prefix+'average_color'] = features[imgcol].apply(get_average_color)
        features[prefix+'average_red'] = features[prefix+'average_color'].apply(lambda x: x[0]) / 255
        features[prefix+'average_green'] = features[prefix+'average_color'].apply(lambda x: x[1]) / 255
        features[prefix+'average_blue'] = features[prefix+'average_color'].apply(lambda x: x[2]) / 255
        features.drop(prefix+'average_color', axis=1, inplace=True)
        features[prefix+'image_size'] = features[imgcol].apply(getSize)
        features[prefix+'temp_size'] = features[imgcol].apply(getDimensions)
        features[prefix+'width'] = features[prefix+'temp_size'].apply(lambda x : x[0])
        features[prefix+'height'] = features[prefix+'temp_size'].apply(lambda x : x[1])
        features.drop(prefix+'temp_size', axis=1, inplace=True)
        features[prefix+'blurrness'] = features[imgcol].apply(get_blurrness_score)
    
    gc.collect()
    return features

def save_features(features, filename, imgcol, deleteimgpath=True):
    if deleteimgpath == True:
        features.drop(imgcol, axis=1, inplace=True)
    filepath = features_path + filename
    features.to_feather(filepath)


In [None]:
# Example: Calculate features
imgs = os.listdir(images_path)
features = pd.DataFrame()
features['imagepath'] = imgs

features = get_imagefeatures(features, imgcol='imagepath', prefix='debug')

gc.collect()
features.head()

In [None]:
# Example: Save featuers
save_features(features, filename='debug.feather', imgcol='imagepath', deleteimgpath=True)
