# Show images from IMDB dataset for labelling and cleaning

Source: https://stackoverflow.com/questions/19471814/display-multiple-images-in-one-ipython-notebook-cell

In [None]:
#use the native Tk backend
%matplotlib tk
import matplotlib.pyplot as plt

import os
import numpy as np
import pandas as pd
import PIL

import pyperclip
#sudo pip3 install pyperclip

import tkinter as tk

In [None]:
image_path = '/home/krittametht/dataset/agegender_imdb/annotations/gender/train/f'

In [None]:
filenames = ! ls $image_path
filenames_df = pd.DataFrame(filenames)
print("Total # files:", len(filenames))

# sort according to int image id from filename format: ""%d.jpg"
sorted_index = filenames_df[0].str.rsplit('.').str[0].astype(int).sort_values().index
filenames_df = filenames_df.reindex(index=sorted_index)
filenames = filenames_df[0].tolist()

In [None]:
def index_of(filenames_series, filename):
    return filenames_series.str.startswith(filename + ".").tolist().index(True)

In [None]:
to_filename_n = 140057
to_filename = str(to_filename_n)
count = index_of(filenames_df[0], to_filename) + 1
print("count up to {}: ".format(to_filename), count)
print("Total # files:", len(filenames))
print("labelled (%): {:.2f}%".format(count/len(filenames)*100))

In [None]:
filenames_df.head(10)
# filenames_df.tail(10)

In [None]:
def loop_each_n_items(l, n, start_index=0, end_index=0):
    end_i = end_index
    if end_index == 0:
        if start_index == 0:
            end_i = len(l)-n+1
        else:
            end_i = start_index+n
    for i in range(start_index,end_i,n):
        yield l[i:i+n]


def get_display_resolution():
    root = tk.Tk()
    width_pixel = root.winfo_screenwidth()
    height_pixel = root.winfo_screenheight()

    width_inch = root.winfo_screenmmwidth() * 1/25.4 #MM_TO_IN
    dpi = width_pixel/width_inch
    root.destroy() #close window
    return width_pixel, height_pixel, dpi

In [None]:
def onclick(event):
    if event.inaxes is not None:
        # the axes object on which the user clicked
        ax = event.inaxes
        # can use ax.children() to find which img artist is in this axes and extract the data from it

        subplot_title = ax.get_title()

        # copy subplot's title to clipboard
        pyperclip.copy(subplot_title.split(".")[0])

In [None]:
# Source: https://nbviewer.jupyter.org/gist/minrk/7076095

def image_grid_plot(list_filenames, figsize=(20,20), columns=5):
    for sel_filenames in list_filenames:
        images = [ PIL.Image.open(f) for f in [os.path.join(image_path, fn) for fn in sel_filenames] ]

    fig = plt.figure(figsize=figsize)
    for i, image in enumerate(images):
        sub = plt.subplot(len(images) / columns + 1, columns, i + 1)
        sub.set_title(image.filename.split('/')[-1])
        plt.imshow(image)
        sub.axes.set_axis_off() #hide subplot's axes
    
    # when mouse button pressed, call onclick function
    cid = fig.canvas.mpl_connect('button_press_event', onclick)
    # make subplots has no margin
    fig.subplots_adjust(bottom=0, top=1, left=0, right=1, wspace=0.0)
    return fig

In [None]:
start_index_n = 60462
start_index = index_of(filenames_df[0], str(start_index_n))

plot_batch_size = 12
columns = 4

# make figure size: half of screen's width and screen's height
screen_width, screen_height, dpi = get_display_resolution()
figsize = (screen_width/2/dpi, screen_height/dpi)

fig = image_grid_plot(loop_each_n_items(filenames, plot_batch_size, start_index, start_index+plot_batch_size), figsize=figsize, columns=columns)

# move the upper left corner of figure at middle of screen's width
plt.get_current_fig_manager().window.geometry("+" + str(int(screen_width/2)) + "+0")