# Preprocessing
By Jorge Fuentes

The Allen Brain Analysis requires photos to be darkened, straightened, and cropped. These functions aim to acheive this programatically.

Currently only works with .jpg images with rgb values of values 0-255.


## Table of contents
1. [File Helpers](#file)
2. [Crop Image](#crop)
3. [Filter Image](#filter)
4. [Main](#main)

In [1]:
import numpy as np
import imageio
import os
from tqdm import tqdm
from scipy import ndimage

## File Helpers <a name="file"></a>
These functions build off the os library to navigate the file structure

In [2]:
def mkdirsafe (newpath):
    """
    Ensures the path exists, creating it if necessary
    :param str newpath: The path to the directory
    """
    if not os.path.exists(newpath): os.makedirs(newpath)

def filterdirectory(path, extension):
    """
    Find unhidden files with the extension in the path's directory
    :param str path: Path to directory
    :param str extension: File extension on the end of filename
    """
    files = [file for file in os.listdir(path) if file.lower().endswith(extension) and file[0] !='.']
    return files


## Crop Image <a name="crop"></a>
testArr = np.array([[[0,255,255],[255,255,255],[0,255,255]],
                    [[255,255,255],[255,255,255],[255,255,255]], 
                    [[255,255,0],[255,255,255],[255,255,0]]], dtype="uint8")

In [3]:
def maskOfColor(color, img):
    red, green, blue = img[:,:,0], img[:,:,1], img[:,:,2]
    mask = (red == color[0]) & (green == color[1]) & (blue == color[2])
    return mask

In [4]:
def cropImage(picArr, bufferRatio):
    """
    It turns all pure white to pure black, then removes any pure black rows, 
        than adds a buffer that is a bufferRatio percentage of the whole picture.
        
    :param numpy array uint8 picArr: picture array
    :param float bufferRatio: percentage of the images width/height to add to each side
    """
    white = [255, 255, 255]
    black = [0, 0, 0]

    #turn white to black
    picArr[maskOfColor(white, picArr)] = black

    #remove all rows and columns of only black
    bMask = maskOfColor(black, picArr)
    bRows = np.all(bMask, axis=1)
    picArr = picArr[~bRows]
    bMask = maskOfColor(black, picArr)
    bCols = np.all(bMask, axis=0)
    picArr = picArr[:, ~bCols]

    #add rows and cols of black
    rowBuffer = np.zeros((int(round(picArr.shape[0]*bufferRatio)), picArr.shape[1], 3), dtype='uint8')
    picArr = np.vstack((rowBuffer, picArr, rowBuffer))

    colBuffer = np.zeros((picArr.shape[0], int(round(picArr.shape[1]*bufferRatio)), 3), dtype='uint8')
    picArr = np.hstack((colBuffer, picArr, colBuffer))
    return picArr

## Filter Image<a name="filter"></a>

In [5]:
def filterImage(img):
    """
    Take a guassian filter of just the green part of the image to reduce noise
    :param numpy array uint8 imagearray: picture array
    """
    #only look at green values
    img = img[:,:,1]
    #the guassian filter is used to reduce noise by blurring/smoothing the image
    img = ndimage.filters.gaussian_filter(img,2)
    #TODO: add option to compress image
    return img

## Main Code<a name="main"></a>
Change variables below to your values

In [7]:
path_to_images = "/Users/jfuentes/Projects/Allen-Brain-Analysis/Images"
bufferRatio = 0.1

In [8]:
os.chdir(path_to_images)
directory = filterdirectory(path_to_images,".jpg")
mkdirsafe('preprocessed')
print(directory)

for picPath in tqdm(directory):
    print(picPath)
    newPicPath = "preprocessed/" + picPath 

    picArr = imageio.imread(picPath)
    picArr = cropImage(picArr, bufferRatio)
    picArr = filterImage(picArr)
    imageio.imwrite(newPicPath, picArr)

  0%|          | 0/10 [00:00<?, ?it/s]

['362_593_1_2_ps.jpg', '398_593_1_11.jpg', '370_593_1_4_ps.jpg', '394_593_1_10.jpg', '374_593_1_5.jpg', '382_593_1_7.jpg', '1_test.jpg', '390_593_1_9.jpg', '386_593_1_8.jpg', '406_593_2_1.jpg']
362_593_1_2_ps.jpg


 10%|█         | 1/10 [00:06<00:55,  6.21s/it]

398_593_1_11.jpg


 20%|██        | 2/10 [00:11<00:45,  5.68s/it]

370_593_1_4_ps.jpg


 30%|███       | 3/10 [00:16<00:37,  5.33s/it]

394_593_1_10.jpg


 40%|████      | 4/10 [00:23<00:34,  5.79s/it]

374_593_1_5.jpg


 50%|█████     | 5/10 [00:29<00:29,  5.82s/it]

382_593_1_7.jpg


 60%|██████    | 6/10 [00:35<00:23,  5.84s/it]

1_test.jpg
390_593_1_9.jpg


 80%|████████  | 8/10 [00:41<00:10,  5.14s/it]

386_593_1_8.jpg


 90%|█████████ | 9/10 [00:47<00:05,  5.26s/it]

406_593_2_1.jpg


100%|██████████| 10/10 [00:55<00:00,  5.58s/it]
