In [1]:
import os
import cv2
from src.preprocessing import make_square
from tqdm.notebook import tqdm
import random
import json

## No need to run again

In [None]:
filenames = os.listdir("data/cfp")

random.seed(42)
random.shuffle(filenames)
filenames[:10]

In [None]:
original_shuffled_map = {}

for i in range(len(filenames)):
    original_shuffled_map[filenames[i]] = f"SHUF{str(i).zfill(5)}"

with open("original_shuffled_map.json", "w") as fp:
    json.dump(original_shuffled_map,fp) 

original_shuffled_map['DEV13781.jpg']

## Run this

In [2]:
CUTOFF_THRESHOLD = 10
OUT_PATH = f'data/shuffled_square_75'

In [3]:
with open('original_shuffled_map.json') as fp:
    original_shuffled_map = json.load(fp)

In [4]:
if not os.path.isdir(OUT_PATH):
    print(f'{OUT_PATH} does not exist, creating dir')
    os.mkdir(OUT_PATH)

In [5]:
from multiprocessing import Pool

def _make_shuffled_square_img(filename):
    try:
        file = f"data/cfp/{filename}"
        img = cv2.imread(file)
        square_img, cutting, padding = make_square(img, CUTOFF_THRESHOLD)
        new_name = original_shuffled_map[filename]
        # cv2.imwrite(f"{OUT_PATH}/{new_name}.jpg", square_img, [int(cv2.IMWRITE_JPEG_QUALITY), 75])        
        return (filename, new_name, cutting, padding, square_img.shape[0], img.shape)

    except Exception as e:
        print(filename, e)
        
        
l_files = os.listdir("data/cfp")

In [6]:
with Pool() as pool:
    op_metadata = list(tqdm(pool.imap(_make_shuffled_square_img, l_files), total=len(l_files)))    
print('Finished.')

  0%|          | 0/15000 [00:00<?, ?it/s]

Finished.


In [None]:
op_metadata = []
for filename in tqdm(l_files):
    r = _make_shuffled_square_img(filename)
    op_metadata.append(r)

In [7]:
import pandas as pd

df_img_info = pd.DataFrame.from_records([{
        'orig_file' : opdata[0],
        'new_file' : opdata[1],
        'delta_x' : opdata[3][0] - opdata[2][0],
        'delta_y' : opdata[3][2] - opdata[2][2],
        'orig_crop_side' : opdata[4],
        'side' : opdata[4],
        'scaling' : 1.0,
    } for opdata in op_metadata if opdata is not None])

df_img_info.to_csv(os.path.join(OUT_PATH, 'img_info.csv'))
df_img_info

Unnamed: 0,orig_file,new_file,delta_x,delta_y,orig_crop_side,side,scaling
0,DEV01452.jpg,SHUF02915,-88,-904,3152,3152,1.0
1,DEV08405.jpg,SHUF04819,171,-144,2262,2262,1.0
2,DEV10680.jpg,SHUF00929,-138,-650,2797,2797,1.0
3,DEV13443.jpg,SHUF14397,168,-152,2256,2256,1.0
4,DEV08466.jpg,SHUF05169,195,-130,2334,2334,1.0
...,...,...,...,...,...,...,...
14995,DEV05006.jpg,SHUF00628,-34,-56,1871,1871,1.0
14996,DEV04121.jpg,SHUF09316,-131,-875,2974,2974,1.0
14997,DEV14861.jpg,SHUF04089,-91,-624,2193,2193,1.0
14998,DEV02406.jpg,SHUF09274,196,-128,2336,2336,1.0
