In [1]:
import cv2 as cv
import os, shutil
from os.path import join, exists
import numpy as np

CATS_DIR = 'cats'
IMG_SIZE_PX = 750   # px

# list all files in the directory
files = sorted(os.listdir(CATS_DIR))

assert all([file.endswith('.PNG') for file in files]), 'Not all files are PNGs'

print(f'Found {len(files)} cats')

# create a new directory for the resized images
shutil.rmtree('cats_resized') if exists('cats_resized') else None
os.makedirs('cats_resized')

for file in files:
    path = join(CATS_DIR, file)
    print(path, end='\r')
    cat = cv.imread(path)

    # start from the center of the image and find the first full black row of pixels 
    bw_cat = cv.cvtColor(cat, cv.COLOR_BGR2GRAY)
    b_mask = (bw_cat > 50).astype('uint8')*255

    #find h and w
    h, w = b_mask.shape

    b_line = b_mask.sum(axis=1) #sum the rows
    b_line = (b_line > 0).astype('uint8')*255 #make it binary

    half = b_mask.shape[0]//2 #half of the image
    top_half = b_line[:half] 
    bottom_half = b_line[half:]

    top_idx = np.where(top_half == 0)[0][-1]  #last black pixel
    bottom_idx = np.where(bottom_half == 0)[0][0] + half #first black pixel
    
    #crop the image
    hc, wc = bottom_idx - top_idx, w # height and width of the cropped cat
    l = min(hc, wc) # side of the square
    ch, cw = (top_idx + bottom_idx)//2, w//2 # center of the square
    squared_cat = cat[ch-l//2:ch+l//2, cw-l//2:cw+l//2] # crop the cat

    ws, hs = squared_cat.shape[:2] # width and height of the squared cat
    assert ws == hs, f'Image is not square: {ws}x{hs}' # check if the image is square

    squared_cat = cv.resize(squared_cat, (IMG_SIZE_PX, IMG_SIZE_PX)) # stadardize the size

    #save the image
    cv.imwrite(join('cats_resized', file), squared_cat)


#######################################################################################
CAT_DIR = 'cats_resized'
PAGES_DIR = 'pages'
#clear the pages directory
shutil.rmtree(PAGES_DIR) if exists(PAGES_DIR) else None
os.makedirs(PAGES_DIR)
files = sorted(os.listdir(CAT_DIR))

A4_H = 297.0        # mm #dont change this
A4_W = 210.0        # mm #dont change this 

IMG_SIZE_MM = 50    # mm
L_MARGIN = 19.0     # mm 
R_MARGIN = 13.2     # mm 
T_MARGIN = 36.7     # mm 
B_MARGIN = 36.7     # mm 
SEPARATION = 0.0    # mm 

GRID_H = 5
GRID_W = 3

pm = IMG_SIZE_PX / IMG_SIZE_MM # pixels per mm

h_pix = pm*T_MARGIN + GRID_H*IMG_SIZE_PX + (GRID_H-1)*pm*SEPARATION + pm*B_MARGIN
w_pix = pm*L_MARGIN + GRID_W*IMG_SIZE_PX + (GRID_W-1)*pm*SEPARATION + pm*R_MARGIN

a4_ratio = A4_H / A4_W
img_ratio = h_pix / w_pix
if a4_ratio > img_ratio: nh_pix, nw_pix = int(w_pix * a4_ratio), w_pix
else: nw_pix, nh_pix = int(h_pix / a4_ratio), h_pix

h_pix, w_pix, nh_pix, nw_pix = int(h_pix), int(w_pix), int(nh_pix), int(nw_pix)

grid_start_x = int((nw_pix - w_pix)/2 + pm*L_MARGIN)
grid_start_y = int((nh_pix - h_pix)/2 + pm*T_MARGIN)

n_pages = np.ceil(len(files) / (GRID_H*GRID_W)).astype('int')
print(f'n_pages: {n_pages}')

pages = [np.ones((nh_pix, nw_pix, 3), dtype='uint8')*255 for _ in range(n_pages)]

for n, file in enumerate(files):
    p = n // (GRID_H*GRID_W)
    row, col = (n % (GRID_H*GRID_W)) // GRID_W, (n % (GRID_H*GRID_W)) % GRID_W
    print(f'page: {p}, row: {row}, col: {col}     ', end='\r')
    page = np.zeros((nh_pix, nw_pix, 3), dtype='uint8')
    x = int(grid_start_x + col*(IMG_SIZE_PX + pm*SEPARATION))
    y = int(grid_start_y + row*(IMG_SIZE_PX + pm*SEPARATION))
    img = cv.imread(join(CAT_DIR, file))
    pages[p][y:y+IMG_SIZE_PX, x:x+IMG_SIZE_PX] = img

for n, page in enumerate(pages):
    cv.imwrite(join(PAGES_DIR, f'page_{n}.PNG'), page)


Found 367 cats
n_pages: 2598.PNG
page: 24, row: 2, col: 0     

In [2]:
#convert pages to pdf
from fpdf import FPDF

PAGES_DIR = 'pages'
PDF_DIR = 'pdf'

#clear the pdf directory
shutil.rmtree(PDF_DIR) if exists(PDF_DIR) else None
os.makedirs(PDF_DIR)

files = sorted(os.listdir(PAGES_DIR))
files = [join(PAGES_DIR, file) for file in files]

pdf = FPDF()
for n, file in enumerate(files):
    print(f'adding page {n+1}/{len(files)}', end='\r')
    pdf.add_page()
    pdf.image(file, 0, 0, 210, 297)
pdf.output(join(PDF_DIR, 'cats.pdf'), "F")

adding page 25/25

''