## Testing for unusable pictures

This file takes images from my "pugatory" folder to test if they will be able to flow through my model or not.

In [None]:
!pip install filetype

Collecting filetype
  Downloading https://files.pythonhosted.org/packages/b4/6b/7bc015da1a576ac037582ae0c5acb675371de9e017e860931e97a428ee31/filetype-1.0.7-py2.py3-none-any.whl
Installing collected packages: filetype
Successfully installed filetype-1.0.7


In [None]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Input, Dense, Conv2D, MaxPool2D, Dropout, Flatten, GlobalAveragePooling2D

import os
import concurrent.futures
import cv2
import filetype

In [None]:
# mount to Google Drive
# data is saved in the test_data folder of my Drive, link is in README for anyone to access.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def worker(filepath, filename):

  """
  This function is used to submit work to the image_checker function
  this worker detects if a file is an image, then deletes if its not
  filepath: full path to the file being treated
  filename: name of the file being treated  
  """

  kind = filetype.guess(filepath)  # tries to guess file type based on header
  if kind is None:
      print('Cannot guess file type for file', filename, 'Deleting it!')
      os.remove(filepath)  # deletes file
  elif 'image' not in kind.mime:
      print(f"File {filepath} is not an image. File type: {kind.extension}")
  else:
      try:  # if file is detected as an image from the header then try to open it
          img = cv2.imread(filepath)  # if successful then file is indeed an image.
          size = img.shape
          print(f'{filepath} is an image. File type:{kind.extension}')
      except:
          print(f'file {filepath} is not a valid image file ')
          os.remove(filepath)  # deletes file


def image_checker(folder_path):

  """
  This function will iterate over all files in a folder and runs the image checker worker on them
  It takes advantage of multi threading, which allows it to run loop iterations in separate threads(max=10). 
  The benefits of it being that the code doesnt need to wait for the end of the execution of one iteration in order to start working on the next file
  It will keep assigning files to check to threads as long as there are threads idle.
  This speeds us the process of running the code. 
  """

  for subdir, dirs, files in os.walk(folder_path):  # this will loop over every file in every sub directory in data/
       with concurrent.futures.ThreadPoolExecutor() as e:
          for filename in files:
              filepath = f'{subdir}/{filename}'
              e.submit(worker, filepath, filename)

In [None]:
image_checker('drive/MyDrive/capstone_translator/purgatory')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
drive/MyDrive/capstone_translator/purgatory/fork/Image_44.jpg is an image. File type:jpg
drive/MyDrive/capstone_translator/purgatory/fork/Image_35.png is an image. File type:png
drive/MyDrive/capstone_translator/purgatory/fork/Image_37.png is an image. File type:png
drive/MyDrive/capstone_translator/purgatory/fork/Image_48.jpg is an image. File type:jpg
drive/MyDrive/capstone_translator/purgatory/fork/Image_42.png is an image. File type:png
drive/MyDrive/capstone_translator/purgatory/fork/Image_43.png is an image. File type:png
drive/MyDrive/capstone_translator/purgatory/fork/Image_40.png is an image. File type:png
drive/MyDrive/capstone_translator/purgatory/fork/Image_46.jpg is an image. File type:webp
drive/MyDrive/capstone_translator/purgatory/fork/Image_50.jpg is an image. File type:jpg
drive/MyDrive/capstone_translator/purgatory/fork/Image_53.jpg is an image. File type:jpg
drive/MyDrive/capstone_translator/purgatory/