In [6]:
!pip install Pillow



In [23]:
from google.colab import drive
import os
from PIL import Image
import numpy as np

In [8]:
# mount google drive to access data
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
# define paths
path = "/content/drive/My Drive/01 - Courses/04 - Machine Learning Engineer Nanodegree/brain_tumor_detection/data"
raw = os.path.join(path, "raw")
processed = os.path.join(path, "processed")

In [12]:
def preprocess_data(input_path, output_path, labels=['yes', 'no'], shape=(128, 128)):
  '''
  Reads input MRI scan images and resizes them to specified shape
  Arguments:
    input_path (str): directory containing yes and no folders of input images
    output_path (str): directory to store processed images in yes and no folders
    labels (list): the two classes of MRI brain scans
    shape (tuple): output image size
  Returns: None
  '''
  unsuccessful_files = {}

  if not os.path.exists(output_path):
    os.makedirs(output_path)

  for folder_name in labels:
    folder_path = os.path.join(input_path, folder_name)
    output_folder = os.path.join(output_path, folder_name)
    if not os.path.exists(output_folder):
      os.makedirs(output_folder)

    for fname in os.listdir(folder_path):
      fpath = os.path.join(folder_path, fname)
      opath = os.path.join(output_folder, fname)
      try:
        img = Image.open(fpath)
        img = img.resize(shape)
        img.convert('RGB').save(opath)

      except Exception as e:
        unsuccessful_files[fname] = e
  
  if unsuccessful_files:
    print(f'Error processing the following files:\n')
    for index, key in enumerate(unsuccessful_files, 1):
      print(f'{index}. {key} - {unsuccessful_files[key]}')
  else:
    print('Successfully processed all images.')

In [13]:
preprocess_data(raw, processed)

Successfully processed all images.
