# manual categorization of pictures

click on pictures in grid to move them from folder *move_from* to folder *move_to*.  
makes manual selections of pictures easier.

tested in Python version 3.7.7 on Windows 10

**important:**  
- if elements exist in folder *move_to*, files with identical names to be moved will be removed
- files are not moved until pygame window is closed

**TODO:**  
- think of way to save progress in case of crash
- allow video categorization
- specify package requirements

**user parameters:**

In [None]:
# specify folder where pictures are located:
move_from = "../../AI stuff/instagram/newly_scraped"

# specify folder where selected pictures should be moved:
move_to = "../../AI stuff/instagram/nonpics"


gridlen = 300   # image edge length in px for display
n_cols = 4      # number of columns in grid
n_rows = 3      # number of rows in grid

In [None]:
# imports

import sys
import PIL
from PIL import Image
import os
import hashlib
import cv2
from pathlib import Path
from tqdm import tqdm
import pygame
from pygame.locals import *

In [None]:
# utility functions

def get_hash_from_img(img):
    md5hash = hashlib.md5(img.tobytes())
    return md5hash.hexdigest()

def get_hash_from_imgfile(imgfile):
    img = Image.open(imgfile)
    md5hash = hashlib.md5(img.tobytes())
    return md5hash.hexdigest()

def get_img_from_vidfile(vidfile):
    vidcap = cv2.VideoCapture(str(vidfile))
    success, image = vidcap.read()
    return image

def get_hash_from_vid(vidfile):
    np_img = get_img_from_vidfile(vidfile)
    img = Image.fromarray(np_img)
    return get_hash_from_img(img)
    
def get_hash_filename(any_file):
    ext = Path(any_file).suffix
    filepath = Path(any_file).parents[0]
    filename = Path(any_file).stem
    if ext == ".mp4":
        hashname = get_hash_from_vid(any_file)
    else:
        hashname = get_hash_from_imgfile(any_file)
    return filepath/(hashname+ext)

def rename_file_with_hash(any_file):
    new_filename = get_hash_filename(any_file)
    try:
        os.rename(any_file, new_filename)
    except FileExistsError:
        # must be double, remove
        os.remove(any_file)

def rename_files_with_hash(path):
    files = [Path(path)/f for f in os.listdir(path)]
    [rename_file_with_hash(f) for f in tqdm(files)]

def move_file(f, dest):
    f = Path(f)
    try:
        os.rename(f, Path(dest)/(f.stem + f.suffix))
    except FileExistsError:
        # must be double, remove
        os.remove(f)
    
def move_to_main_folder(move_from, move_to):
    all_orig_files = [Path(move_from)/f for f in os.listdir(move_from)]
    for f in all_orig_files:
        move_file(f)
        """
        try:
            os.rename(f, Path(move_to)/(f.stem + f.suffix))
        except FileExistsError:
            # must be double, remove
            os.remove(f)
        """

In [None]:
# setup pygame pictures

# setup variables
path = move_from
files = [Path(path)/f for f in os.listdir(path)]
pygame_loaded_pics = [pygame.image.load(str(f)) for f in tqdm(files)]
pygame_pics = []
for plp in tqdm(pygame_loaded_pics):
    dims = plp.get_rect().size
    scale_factor = gridlen/max(dims)
    new_dims = (int(dims[0]*scale_factor), int(dims[1]*scale_factor))
    resized_plp = pygame.transform.scale(plp, new_dims)
    pygame_pics.append(resized_plp)

**block below runs the pygame window**

In [None]:
window_dims = [gridlen*n_cols, gridlen*n_rows]
n_pics_per_panel = n_cols*n_rows
current_ind = 0
clicked = [[] for i in range(len(files))]
total_panels = int(len(files)/n_pics_per_panel) + 1
pbar = tqdm(total=total_panels)

def get_col_row_from_ind(ind):
    col = ind%n_cols
    row = int(ind/n_cols)
    return [col, row]

def get_ind_from_coords(x, y):
    col = int(x/gridlen)
    row = int(y/gridlen)
    return row*n_cols + col

def get_pics_from_clicked():
    inds = []
    for i, c in enumerate(clicked):
        for panel_ind in c:
            inds.append(i*n_pics_per_panel + panel_ind)
    return [files[i] for i in inds]

# open window
pygame.init()
fps = 60
fpsClock = pygame.time.Clock()
width, height = window_dims
screen = pygame.display.set_mode((width, height))

running = True
while running:
    screen.fill((0, 0, 0))

    for pic_ind in range(n_pics_per_panel):
        im_ind = current_ind*n_pics_per_panel + pic_ind
        col, row = get_col_row_from_ind(pic_ind)
        if pic_ind not in clicked[current_ind] and im_ind < len(pygame_pics):
            screen.blit(pygame_pics[im_ind], (gridlen*col, gridlen*row))
        
    ev = pygame.event.get()

    # proceed events
    for event in ev:
        # handle MOUSEBUTTONUP
        if event.type == pygame.MOUSEBUTTONUP:
            pos = pygame.mouse.get_pos()
            ind = get_ind_from_coords(pos[0], pos[1])
            if ind not in clicked[current_ind]:
                if current_ind*n_pics_per_panel + ind < len(files):
                    clicked[current_ind].append(ind)
            else:
                # clicked again = remove that element again from clicked list
                clicked[current_ind] = [e for e in clicked[current_ind] if not e == ind]
        if event.type == QUIT:
            # TODO move the images here
            [move_file(f, move_to) for f in get_pics_from_clicked()]
            pygame.quit()
            sys.exit()
            running = False
            pbar.close()
        if event.type == pygame.KEYDOWN:
            if event.key == pygame.K_DOWN and current_ind < total_panels - 1:
                current_ind += 1
                pbar.update(1)
            if event.key == pygame.K_UP and current_ind > 0:
                current_ind -= 1
                pbar.update(-1)

    pygame.display.update()