In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import skimage
from skimage import color, io
from skimage.filters import threshold_otsu
import itertools
from PIL import Image
from matplotlib import cm

In [23]:
def print_cuts(orig_image, pix_mean, cuts):
    ig, axes = plt.subplots(ncols=3, figsize=(20, 2.5))
    ax = axes.ravel()
    ax[0] = plt.subplot(1, 3, 1)
    ax[1] = plt.subplot(1, 3, 2)
    ax[2] = plt.subplot(1, 3, 3, sharex=ax[0], sharey=ax[0])

    ax[0].imshow(image, cmap=plt.cm.gray)
    ax[0].set_title('Original')
    ax[0].axis('off')

    ax[1].plot(pix_mean)
    ax[1].set_title('Mean Pixel Count')

    ax[2].imshow(orig_image, cmap=plt.cm.gray)
    ax[2].set_title('Cuts')
    for x in cuts:
        ax[2].axvline(x, color='r')

In [24]:
def get_cuts(image, print_result=False):
    '''
    Get an image and returns a list of x coordinate in which the image could be cut 
    without going through any pictorial elements.
    '''
    #convert to gray scale and seperate between black and white pixels
    image = color.rgb2gray(image)
    thresh = threshold_otsu(image)
    binary = image > thresh
    
    #mean of black pixes in every column
    H = binary.shape[0]
    W = binary.shape[1]
    take = int(H * 0.05) #ignore top and bottom frame
    pix = np.invert(binary)[take:H-take,:].mean(axis=0)

    #look for local minima
    argmin = np.argsort(pix)
    min_pts = {idx : val for val, idx in zip(pix[argmin], argmin)}

    cuts = set()
    for k in min_pts:
        if min_pts[k] > 0.04:
            break
        
        #not in edges
        if k < int(W * 0.15) or k > int(W - (W * 0.15)):
            continue
        
        #local minimum
        skip = False  
        cuts_copy = cuts
        for cut in cuts:
            if (k - cut) < int(W * 0.15):
                if min_pts[k] < min_pts[cut]:
                    cuts_copy.remove(cut)
                else:
                    skip = True
                break
        
        cuts = cuts_copy
        if skip == True:
            continue
            
        cuts.add(k)
    
    if print_result:
        print_cuts(image, pix, cuts)
        
    return cuts

In [25]:
def get_pic_permutations(image, cuts):
    '''
    Get a list of x cooridante in which an image could be cut and returns a list of all posibble 
    permutations of image tiles.
    A permutation is a dictionary of tile#: (tile_begin, tile_end)
    '''
    if len(cuts) == 0:
        return [{0 : (0, image.shape[1] - 1)}]
    
    #cuts to image tiles 
    tiles = {}
    begin = 0
    for i, cut in enumerate(sorted(cuts)):
        tiles[i] = (begin,cut)
        begin = cut + 1
    tiles[i + 1] = (begin, image.shape[1] - 1)
    
    #get all tiles permutations
    permutations = []
    for per in itertools.permutations(tiles, len(tiles)):
        ordering = {}
        for tile in per:
            ordering[tile] = tiles[tile]
        permutations.append(ordering)
    
    return permutations

In [26]:
def create_permutated_images(image, permutations, print_permutations=True):
    '''
    Get an image and a list of permutations.
    Returns a list of tuples (permutation name, permutated image).
    '''
    #no possible permutations for image
    if len(permutations) == 1:
        return list()
    
    orig_image = Image.fromarray(image)
    
    #create permutated images, skip first one which is identity
    permutated_images = []
    height = orig_image.height - 1
    for i in range(1, len(permutations)):   
        regions = []
        y = 0
        for tile in permutations[i]:
            (x, width) = permutations[i][tile]
            region = orig_image.crop((x, y, width, height))
            regions.append(region)

        permutated = Image.new(mode="RGB", size=orig_image.size)
        #paste tiles in target location
        x = y = 0
        for region in regions:
            permutated.paste(region, (x, y))
            x += region.width
        permutated_image_name = list(permutations[i].keys())
        permutated_images.append((permutated_image_name, permutated))
    
    if print_permutations:
        #print original
        ncols = len(permutated_images) + 1
        ig, axes = plt.subplots(ncols=ncols, figsize=(20, 2.5))
        ax = axes.ravel()
        ax[0] = plt.subplot(1, ncols, 1)
        ax[0].imshow(orig_image, cmap=plt.cm.gray)
        ax[0].set_title('Original')
        
        for i in range(1, ncols):
            ax[i] = plt.subplot(1, ncols, i + 1)
            ax[i].set_title(permutated_images[i-1][0])
            ax[i].imshow(permutated_images[i-1][1], cmap=plt.cm.gray)
            
    return permutated_images

The bellow cell should be run once to create cache for image permutations

In [30]:
label_file = '/Users/atad/Desktop/DIGANES/diganes/diganens.csv'
pic_dir = '/Users/atad/Desktop/DIGANES/pics'

labels = pd.read_csv(label_file)

nof_rows = len(labels)
for idx in range(nof_rows):
    if labels.loc[idx, "category"] != 'drawing':
        continue
    
    df_row = labels.loc[idx]
    orig_img_name = labels.loc[idx, "img_name"]
    orig_img_path = os.path.join(pic_dir, orig_img_name)
     
    #find all possible cuts for image and create permutations
    orig_image = io.imread(orig_img_path)
    cuts = get_cuts(orig_image, print_result=False)
    permutations = get_pic_permutations(orig_image, cuts)
    permutated_images = create_permutated_images(orig_image, permutations, print_permutations=False)
    
    if len(permutated_images) == 0:
        continue
    
    #update df and write permutated images to disk
    for per in permutated_images:
        new_image_name = 'per_' + '_'.join(str(x) for x in per[0]) + '_' + orig_img_name
        row_copy = df_row.copy()
        row_copy["img_name"] = new_image_name
        labels = labels.append(row_copy)
        per[1].save(os.path.join(pic_dir, new_image_name))

#labels.to_csv('per_diganens.csv', index=False)



# Resize images to reduce run time

In [3]:
label_file = '/Users/atad/Desktop/DIGANES/diganes/diganens.csv'
pic_dir = '/Users/atad/Desktop/DIGANES/pics'
resized_dir = '/Users/atad/Desktop/DIGANES/resized_pics'

labels = pd.read_csv(label_file)

permutations = pd.DataFrame([x for x in os.listdir(pic_dir)if
                                  (x.startswith('per') and x.endswith(".jpg"))],columns=['Filename'])

nof_rows = len(labels)
for idx in range(nof_rows):
    if labels.loc[idx, "category"] != 'drawing':
        continue
    
    df_row = labels.loc[idx]
    img_name = labels.loc[idx, "img_name"]
    img_path = os.path.join(pic_dir, img_name)
    
    #resize original image
    img = Image.open(img_path)
    res_img = img.resize((224,224))
    resized_path = os.path.join(resized_dir, img_name)
    res_img.save(resized_path)
    res_img.close()
    img.close()
    
    per_match = permutations[permutations['Filename'].str.contains(img_name)]
    
    for i in range(len(per_match)):
        per_img_name = per_match.iloc[i, 0]
        per_img = Image.open(os.path.join(pic_dir, per_img_name))
        res_per_img = per_img.resize((224,224))
        resized_per_path = os.path.join(resized_dir, per_img_name)
        res_per_img.save(resized_per_path)
        res_per_img.close()
        per_img.close()

# Convert images to true black and white

In [12]:
image_dir = '/Users/atad/Desktop/DIGANES/resized_pics'
bw_dir = '/Users/atad/Desktop/DIGANES/bw_pics'

for filename in os.listdir(image_dir):
    img_path = os.path.join(image_dir, filename)
    image = io.imread(img_path)
    image = color.rgb2gray(image)
    thresh = threshold_otsu(image)
    binary = image > thresh
    io.imsave(os.path.join(bw_dir, filename), skimage.img_as_ubyte(binary))