# Untilities
> Useful function and utilities

In [1]:
# default_exp utils

## Image

### Cleaning up error image

In [2]:
# export
from pathlib import Path
from tqdm.notebook import tqdm
import os
import logging
from typing import Callable, List
from PIL import Image

In some cases, we have error image that will interrupt model trainging. We can use ```clean_error_img``` to clean the image folder

In [3]:
# export
def check_img(
    img: Path,
    formats: List[str] = [".jpg", ".jpeg", ".png", ".bmp"],
) -> None:
    """
    Check on a single image,
    If it's quality is troublesome
        we unlink/ditch the image
    """
    img = Path(img)
    # check if this path is an image
    if img.suffix.lower().split("?")[0] not in formats:
        return

    try:
        # try to open that image
        _ = Image.open(img)
    except Exception as e:
        if img.exists():
            img.unlink()
            logging.warning(f"removed error img: {img}")


def clean_error_img(
    path: Path,
    progress: bool = True,
) -> None:
    """
    - path: an image directory
    - progress: do we print out progress bar or not
        default True
    """
    path = Path(path)

    # check directory existence
    if path.exists() == False:
        raise FileExistsError(
            f"""path does not exists on:{path}, 
    make sure there is a directory "{path.name}".
    under directory "{path.parent}"
    """)

    # create iterator, probably with progress bar
    iterator = tqdm(list(path.iterdir()), leave=False)\
        if progress else path.iterdir()

    for obj in iterator:
        if obj.is_dir():
            # use recursion to clean the sub folder
            clean_error_img(obj, progress=progress)
        else:
            # cheking on a single image
            check_img(obj)

## Testing

### test image cleaner

In [4]:
# hide
def testing_image_cleaner(func: Callable) -> None:
    path = Path("./test_remove")
    os.system(f"rm -rf {path}")
    path.mkdir()
    with open(path/f"file_1.JPEG", "w") as f:
        f.write("fake image")
        
    with open(path/f"file_1.jpg", "w") as f:
        f.write("fake image")
        
    with open(path/f"file_1.txt", "w") as f:
        f.write("not image")
        
    with open(path/f"file_2.png", "w") as f:
        f.write("fake image")
    
    sub1 = path/"sub1"
    sub2 = path/"sub2"
    sub1.mkdir()
    sub2.mkdir()
    sub3 = sub2/"sub3"
    sub3.mkdir()
    
    with open(sub1/"file3.BMP", "w") as f:
        f.write("fake image")
        
    with open(sub1/"file3 haha.txt", "w") as f:
        f.write("not image")
        
    with open(sub1/"😱file3 haha.jpg", "w") as f:
        f.write("fake image")
        
    with open(sub2/"file3.jpg", "w") as f:
        f.write("fake image")
    
    with open(sub2/"file4.jpeg", "w") as f:
        f.write("fake image")
        
    file5 = sub3/"file_5.jpeg"
    with open(file5, "w") as f:
        f.write("fake image")
        
    func(path)
    for p in [path, sub1, sub2, sub3]:
        print(list(p.iterdir()))
    os.system(f"rm -rf {path}")

In [5]:
# hide
testing_image_cleaner(clean_error_img)

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))



HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))



[PosixPath('test_remove/file_1.txt'), PosixPath('test_remove/sub1'), PosixPath('test_remove/sub2')]
[PosixPath('test_remove/sub1/file3 haha.txt')]
[PosixPath('test_remove/sub2/sub3')]
[]
