# Preprocessing
By Jorge Fuentes

The Allen Brain Analysis requires photos to be darkened, straightened, and cropped. These functions aim to acheive this programatically.

Currently only works with .jpg images with rgb values of values 0-255.


## Table of contents
1. [File Helpers](#file)
2. [Crop Image](#crop)
3. [Filter Image](#filter)
4. [Main](#main)

In [8]:
import numpy as np
import imageio
import os
from tqdm import tqdm
from scipy import ndimage

## File Helpers <a name="file"></a>
These functions build off the os library to navigate the file structure

In [9]:
def mkdirsafe (newpath):
    """
    Ensures the path exists, creating it if necessary
    :param str newpath: The path to the directory
    """
    if not os.path.exists(newpath): os.makedirs(newpath)

def filterdirectory(path, extension):
    """
    Find unhidden files with the extension in the path's directory
    :param str path: Path to directory
    :param str extension: File extension on the end of filename
    """
    files = [file for file in os.listdir(path) if file.lower().endswith(extension) and file[0] !='.']
    return files


## Crop Image <a name="crop"></a>
testArr = np.array([[[0,255,255],[255,255,255],[0,255,255]],
                    [[255,255,255],[255,255,255],[255,255,255]], 
                    [[255,255,0],[255,255,255],[255,255,0]]], dtype="uint8")

In [10]:
def maskOfColor(color, img):
    red, green, blue = img[:,:,0], img[:,:,1], img[:,:,2]
    mask = (red == color[0]) & (green == color[1]) & (blue == color[2])
    return mask

In [11]:
def cropImage(picArr, bufferRatio):
    """
    It turns all pure white to pure black, then removes any pure black rows, 
        than adds a buffer that is a bufferRatio percentage of the whole picture.
        
    :param numpy array uint8 picArr: picture array
    :param float bufferRatio: percentage of the images width/height to add to each side
    """
    white = [255, 255, 255]
    black = [0, 0, 0]

    #turn white to black
    picArr[maskOfColor(white, picArr)] = black

    #remove all rows and columns of only black
    bMask = maskOfColor(black, picArr)
    bRows = np.all(bMask, axis=1)
    picArr = picArr[~bRows]
    bMask = maskOfColor(black, picArr)
    bCols = np.all(bMask, axis=0)
    picArr = picArr[:, ~bCols]

    #add rows and cols of black
    rowBuffer = np.zeros((int(round(picArr.shape[0]*bufferRatio)), picArr.shape[1], 3), dtype='uint8')
    picArr = np.vstack((rowBuffer, picArr, rowBuffer))

    colBuffer = np.zeros((picArr.shape[0], int(round(picArr.shape[1]*bufferRatio)), 3), dtype='uint8')
    picArr = np.hstack((colBuffer, picArr, colBuffer))
    return picArr

## Filter Image<a name="filter"></a>

In [12]:
def filterImage(img):
    """
    Take a guassian filter of just the green part of the image to reduce noise
    :param numpy array uint8 imagearray: picture array
    """
    #only look at green values
    img = img[:,:,1]
    #the guassian filter is used to reduce noise by blurring/smoothing the image
    img = ndimage.filters.gaussian_filter(img,2)
    #TODO: add option to compress image
    return img

## Main Code<a name="main"></a>
Change variables below to your values

In [15]:
path_to_images = "/Users/jfuentes/Projects/Allen-Brain-Analysis/idealImages/2495"
bufferRatio = 0.1

In [16]:
os.chdir(path_to_images)
directory = filterdirectory(path_to_images,".jpg")
mkdirsafe('preprocessed')
print(directory)

for picPath in tqdm(directory):
    print(picPath)
    newPicPath = "preprocessed/" + picPath 

    picArr = imageio.imread(picPath)
    picArr = cropImage(picArr, bufferRatio)
    picArr = filterImage(picArr)
    imageio.imwrite(newPicPath, picArr)

  0%|          | 0/61 [00:00<?, ?it/s]

['165_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{4}.jpg', '185_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{6}.jpg', '250_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{4}.jpg', '405_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{6}.jpg', '374_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{3}.jpg', '305_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{11}.jpg', '125_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{1}.jpg', '105_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{3}.jpg', '153_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{14}.jpg', '230_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{11}.jpg', '285_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{10}.jpg', '226_out_{Wendy}{012115}{2495-2}{Jan 2

  2%|▏         | 1/61 [00:08<08:17,  8.30s/it]

185_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{6}.jpg


  3%|▎         | 2/61 [00:15<07:46,  7.90s/it]

250_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{4}.jpg


  5%|▍         | 3/61 [00:24<07:56,  8.21s/it]

405_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{6}.jpg


  7%|▋         | 4/61 [00:28<06:50,  7.20s/it]

374_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{3}.jpg


  8%|▊         | 5/61 [00:36<06:51,  7.35s/it]

305_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{11}.jpg


 10%|▉         | 6/61 [00:46<07:06,  7.75s/it]

125_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{1}.jpg


 11%|█▏        | 7/61 [00:51<06:36,  7.35s/it]

105_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{3}.jpg


 13%|█▎        | 8/61 [00:56<06:11,  7.01s/it]

153_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{14}.jpg


 15%|█▍        | 9/61 [01:02<05:58,  6.90s/it]

230_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{11}.jpg


 16%|█▋        | 10/61 [01:10<06:00,  7.08s/it]

285_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{10}.jpg


 18%|█▊        | 11/61 [01:23<06:17,  7.55s/it]

226_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{8}.jpg


 20%|█▉        | 12/61 [01:31<06:13,  7.63s/it]

314_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{8}.jpg


 21%|██▏       | 13/61 [01:42<06:16,  7.85s/it]

209_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{7}.jpg


 23%|██▎       | 14/61 [01:51<06:15,  7.98s/it]

197_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{4}.jpg


 25%|██▍       | 15/61 [02:01<06:11,  8.08s/it]

113_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{9}.jpg


 26%|██▌       | 16/61 [02:05<05:53,  7.86s/it]

193_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{0}.jpg


 28%|██▊       | 17/61 [02:12<05:43,  7.81s/it]

214_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{9}.jpg


 30%|██▉       | 18/61 [02:23<05:43,  7.98s/it]

190_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{15}.jpg


 31%|███       | 19/61 [02:31<05:35,  7.99s/it]

253_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{3}.jpg


 33%|███▎      | 20/61 [02:42<05:32,  8.12s/it]

158_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{2}.jpg


 34%|███▍      | 21/61 [02:49<05:23,  8.08s/it]

309_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{1}.jpg


 36%|███▌      | 22/61 [03:00<05:20,  8.21s/it]

265_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{6}.jpg


 38%|███▊      | 23/61 [03:11<05:17,  8.34s/it]

218_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{2}.jpg


 39%|███▉      | 24/61 [03:20<05:09,  8.37s/it]

205_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{10}.jpg


 41%|████      | 25/61 [03:29<05:01,  8.38s/it]

245_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{8}.jpg


 43%|████▎     | 26/61 [03:39<04:56,  8.46s/it]

362_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{2}.jpg


 44%|████▍     | 27/61 [03:50<04:50,  8.55s/it]

273_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{0}.jpg


 46%|████▌     | 28/61 [04:00<04:43,  8.59s/it]

370_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{0}.jpg


 48%|████▊     | 29/61 [04:10<04:36,  8.64s/it]

301_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{4}.jpg


 49%|████▉     | 30/61 [04:21<04:30,  8.71s/it]

173_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{8}.jpg


 51%|█████     | 31/61 [04:29<04:20,  8.68s/it]

149_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{10}.jpg


 52%|█████▏    | 32/61 [04:36<04:10,  8.64s/it]

354_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{5}.jpg


 54%|█████▍    | 33/61 [04:47<04:04,  8.72s/it]

257_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{11}.jpg


 56%|█████▌    | 34/61 [04:56<03:55,  8.73s/it]

330_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{7}.jpg




366_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{8}.jpg


 59%|█████▉    | 36/61 [05:22<03:43,  8.95s/it]

402_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{7}.jpg


 61%|██████    | 37/61 [05:26<03:31,  8.83s/it]

317_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{0}.jpg




277_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{7}.jpg


 64%|██████▍   | 39/61 [07:06<04:00, 10.93s/it]

222_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{5}.jpg


 66%|██████▌   | 40/61 [07:15<03:48, 10.90s/it]

261_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{1}.jpg


 67%|██████▋   | 41/61 [07:27<03:38, 10.92s/it]

137_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{7}.jpg


 69%|██████▉   | 42/61 [07:33<03:24, 10.79s/it]

358_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{4}.jpg


 70%|███████   | 43/61 [07:45<03:14, 10.81s/it]

269_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{9}.jpg


 72%|███████▏  | 44/61 [07:55<03:03, 10.81s/it]

281_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{5}.jpg


 74%|███████▍  | 45/61 [08:08<02:53, 10.85s/it]

121_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{13}.jpg


 75%|███████▌  | 46/61 [08:13<02:40, 10.72s/it]

234_out_{Wendy}{012115}{2495-3}{Jan 21 15, 165035}{ImmunoFluorescentHighRes}{2}.jpg


 77%|███████▋  | 47/61 [08:23<02:30, 10.72s/it]

322_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{5}.jpg


 79%|███████▊  | 48/61 [08:35<02:19, 10.75s/it]

289_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{3}.jpg


 80%|████████  | 49/61 [08:47<02:09, 10.77s/it]

177_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{3}.jpg


 82%|████████▏ | 50/61 [08:57<01:58, 10.75s/it]

349_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{1}.jpg


 84%|████████▎ | 51/61 [09:08<01:47, 10.76s/it]

181_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{12}.jpg


 85%|████████▌ | 52/61 [09:16<01:36, 10.69s/it]

338_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{9}.jpg


 87%|████████▋ | 53/61 [09:26<01:25, 10.69s/it]

133_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{5}.jpg


 89%|████████▊ | 54/61 [09:32<01:14, 10.60s/it]

342_out_{Wendy}{012115}{2495-5}{Jan 21 15, 165333}{ImmunoFluorescentHighRes}{9}.jpg


 90%|█████████ | 55/61 [09:44<01:03, 10.63s/it]

297_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{6}.jpg


 92%|█████████▏| 56/61 [09:56<00:53, 10.65s/it]

326_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{10}.jpg


 93%|█████████▎| 57/61 [10:08<00:42, 10.68s/it]

110_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{6}.jpg


 95%|█████████▌| 58/61 [10:12<00:31, 10.56s/it]

117_out_{Wendy}{012115}{2495-1}{Jan 21 15, 164730}{ImmunoFluorescentHighRes}{11}.jpg


 97%|█████████▋| 59/61 [10:17<00:20, 10.46s/it]

201_out_{Wendy}{012115}{2495-2}{Jan 21 15, 164904}{ImmunoFluorescentHighRes}{1}.jpg


 98%|█████████▊| 60/61 [10:25<00:10, 10.43s/it]

293_out_{Wendy}{012115}{2495-4}{Jan 21 15, 165204}{ImmunoFluorescentHighRes}{2}.jpg


100%|██████████| 61/61 [10:35<00:00, 10.42s/it]
