In [None]:
import sys
sys.path.append("../input/cleanfid20211014")

import os
import glob
from pathlib import Path

from tqdm import tqdm

import torch

from cleanfid import resize
from cleanfid.resize import FolderResizer
from cleanfid.utils import EXTENSIONS


SIZES = [224, 384, 512] # resize all images to these sizes (size, size)
OUTPUT_EXTENSION = '.png'
BASE = '../input/petfinder-pawpularity-score/'
FOLDERS = ['test', 'train'] # resize all images in these folders
LIMIT = None # None=no limit. Set to 10 or 30 or whatever to test a few images

BATCH_SIZE = 64
NUM_WORKERS = 4 # set to number of cpu cores. kaggle currently allows 4
USE_CUDA = False

# Shouldn't need to change the following if running this on kaggle
OUTFOLDER = '/kaggle/working'

In [None]:
use_cuda = False
if USE_CUDA:
    use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

params = {'batch_size': BATCH_SIZE,
          'num_workers': NUM_WORKERS}

## Some code adapted from: https://github.com/GaParmar/clean-fid
def process_dir(files, size, new_folder):
    size_tuple = (size, size)
    fn_resize = resize.make_resizer("PIL", True, "bicubic", size_tuple)
    dataset = FolderResizer(files=files, outpath=new_folder,
            fn_resize=fn_resize, output_ext=OUTPUT_EXTENSION)
    img_gener = torch.utils.data.DataLoader(dataset, **params)
    for item in tqdm(img_gener):
        pass

for folder in FOLDERS:
    files = [file for ext in EXTENSIONS for file in glob.glob(os.path.join(
        BASE, folder, f"**/*.{ext}"), recursive=True)]
    files = sorted(files)

    if LIMIT is not None:
        files = files[:LIMIT]

    for size in SIZES:
        new_folder = os.path.join(OUTFOLDER, folder + str(size))
        Path(new_folder).mkdir(parents=True, exist_ok=True)
        
        print(f"Found {len(files)} images for folder {new_folder}")
        process_dir(files, size, new_folder)