In [10]:
import os, matplotlib, json, gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import measure
from scipy import ndimage, misc, stats 
matplotlib.style.use('ggplot')

In [11]:
def filterdirectory(path,extension):
    """return every file under the directory given by the path that ends with the extension"""
    files = [file for file in os.listdir(path) if file.lower().endswith(extension) and file[0] !='.']
    return files

In [12]:
def mkdirsafe (newpath):
    """make directory if it doesn't already exist"""
    if not os.path.exists(newpath): os.makedirs(newpath)

In [13]:
#unessential
class detectionobject:
    
    def detectlabels (self,array):
        """returns array where identical pixels are given same label"""
        labeled = measure.label(array)
        return labeled
    
    def detectobjects (self,labels):
        """returns the minimal parallelepiped that contains all of one label in slices"""
        objects = ndimage.measurements.find_objects(labels)
        return objects

    def arealist (self,array,objects):
        """returns list of the sizes of objects in same order"""
        areas = []
        [areas.append(array[obj].size) for obj in objects]
        return areas
    
    def __init__(self, array):
        """creates labels, objects, list of areas, largest area slice, and array largest area"""
        self.labeled = self.detectlabels (array)
        self.objects = self.detectobjects(self.labeled)
        self.objectareas = self.arealist(array, self.objects)
        #== statement returns an boolean array where the only spot that is true is the max area
        #[0][0] gets location of max(since its the first/only nonzero) so you get index of object of largest area
        self.largestobjectslice = self.objects[np.nonzero(np.array(self.objectareas)== np.array(self.objectareas).max())[0][0]]
        #gets actual array values of object slice
        self.largestobjectarray = array[self.largestobjectslice]

In [14]:
def filterImage(imagearray):
    """take a guassian filter of the image to reduce noise then 
        return a boolean mask noting extremely high values"""
    #the guassian filter is used to reduce noise by blurring/smoothing the image
    imagearray = ndimage.filters.gaussian_filter(imagearray,2)
    #thresh is a boolean array mirroring the image true if the pixel value is 4.5 std's bigger than the mean
    thresh = imagearray>(imagearray.mean() + 4.5*imagearray.std())
    #imagearray[np.invert(thresh)] = 0
    #imagearray[thresh] = imagearray[thresh]>(imagearray[thresh].mean() + 1.5*imagearray[thresh].std()) 
    #return  imagearray
    if thresh.sum()<30: #if there were only a few numbers over thresh then lower standards
        thresh = imagearray>(imagearray.mean() + 3.5*imagearray.std())
    return thresh

In [15]:
def filterObjects(labels,lower=20,upper=500):
    """returns the labels and the objects within the threshold of size"""
    objects = ndimage.measurements.find_objects(labels)
    selectors=[]
    remove_objects =[]
    #keep_objects=[]
    #selectors = a mask for slices sized between the lower and upper bound
    #this is NOT how you use list comprehension, im annoyed
    [selectors.append(labels[obj].size>lower and labels[obj].size<upper) for obj in objects]
    selectors=np.array(selectors) #turn selectors into nparray
    indexer = np.arange(selectors.size) #get two range size of selectors
    indexer_inverse = np.arange(selectors.size)
    #gets the indexes sized appropriately  
    indexer = indexer[selectors];
    #gets the indexes not sized appropriately
    indexer_inverse = indexer_inverse[np.invert(selectors)];
    #remove_objects = all the object slices not size appropriately
    [remove_objects.append(objects[o]) for o in indexer_inverse];
    #[keep_objects.append(objects[o]) for o in indexer];
    #set all the 1s to 0s in the labeled array in the slices sized inappropriately
    np.set_printoptions(threshold='nan')
    for remv in remove_objects:
        labels[remv] =0
    #find the objects in the 1-0's,
    objects = ndimage.measurements.find_objects(labels)
    #find the objects that arent none and return it and the labels(returns none when numbered labels missing) 
    keep_objects = [x for x in objects if x] 
    return labels,keep_objects 

In [16]:
#unessential
def AutoCrop(image, skip=1):
    #sum returns the sum of the RGB values and mean averages 
    #the > returns a boolean array giving true for each entry above the average
    Sectionbinary = image.sum(2)>image.sum(2).mean()
    #run detection with the boolean array
    detectedSection = detectionobject(Sectionbinary)
    #this if seems to be some horrible form of attempted cropping and is skipped in the real code
    if skip == 0:
        if (float(np.array(detectedSection.objectareas).max())/float (np.size(Sectionbinary)))>0.20:
            return image[detectedSection.largestobjectslice]
        else:
            return image
    else:
        return image

In [32]:
def _zoom2Large (largearray,smallarray):
"""Zoom the small array to the size of the large array"""
        largearrayshape = np.float64(largearray.shape)
        smallarrayshape = np.float64 (smallarray.shape)
        zoomfactor = (largearrayshape [0]/smallarrayshape[0],largearrayshape[1]/smallarrayshape[1])
        zoomfactor = zoomfactor #delete this line
        zoomedsmall = ndimage.zoom(smallarray, order=0, zoom = zoomfactor)
        return zoomedsmall

In [33]:
def allencomparisonarray (sectionnumber, imagearray, Allen_detect_annotation_path='/home/dfpena/Documents/P56_Mouse_annotation/' ):
    """Read the sectionnumber from the Allen and zoom it to the size of the imagearray"""
    #read the int32 array from the panda
    Allendetected = pd.read_pickle(Allen_detect_annotation_path +'Allen_detected_annotation.panda').values[sectionnumber][0]
    largeallen = _zoom2Large(imagearray,Allendetected)
    return largeallen

In [34]:
def process_images(ipath,iterator,allenlibpath):
#     try:    
    mkdirsafe('panda')
    mkdirsafe('3d')
    mkdirsafe('arrays')
    #get sectionnumber from number before first _ in name
    Sectionnumber = int(ipath.split('_')[0])
    #TODO: autocrop actually just equivalent to opening the image rn
    image = AutoCrop(ndimage.imread(ipath))
    #name and save image
    np.savez('arrays/'+str(iterator)+'_'+str(Sectionnumber)+'_'+ 'croppedsection',image)
    image = filterImage(image[:,:,1]) #could just use grayscale
    #name and save mask of high values
    np.savez('arrays/'+str(iterator)+'_'+str(Sectionnumber)+'_'+'filtercroppedsection',image)
    labeled = detectionobject(image).labeled #turn boolean mask into numbered mask
    labeled, objects = filterObjects(labeled)
    #name and save labels within threshold
    np.savez('arrays/'+str(iterator)+'_'+str(Sectionnumber)+'_'+ 'labels',labeled)
    #mkdebug_fig('DebugImages',(str(Sectionnumber)+'_labeledfiltered'),labeled)
    Allen = allencomparisonarray(Sectionnumber,image,allenlibpath)
    np.savez('arrays/'+str(iterator)+'_'+str(Sectionnumber)+'_'+'AllenResized',Allen)
#         #mkdebug_fig('DebugImages',str(iterator) + '_' +(str(Sectionnumber)+'_Verification'),Allen,0.6)
#         cellmap =enumerateRegions(objects,Allen,Sectionnumber,allenlibpath)
#         cellmap.to_pickle('panda/'+ str(iterator) + '_' +str(Sectionnumber)+'.panda')
#         np.savez('3d/'+str(iterator)+'_'+str(Sectionnumber),objects )
#         print(iterator, ipath)
#         return (iterator, ipath)
#     except ValueError:
#         print('No cells could be detected')
#         print(iterator, ipath)
#         return (iterator, ipath)

In [35]:
path_to_images = 'C:/Users/Student/Desktop/LAB/scans/testbed/'
path_to_allenlib = 'C:/Users/Student/Desktop/LAB/Allen/'
path_to_figurearray= 'C:/Users/Student/Desktop/LAB/scans/testbed/arrays/'
animalname = '18'

pd.read_pickle('C:/Users/Student/Desktop/LAB/Allen/Allen_detected_annotation.panda')

os.chdir(path_to_images) 
directory = filterdirectory(os.curdir,".jpg")
print(directory)

i=0
for img in directory:
    process_images(img, i, path_to_allenlib)
    i+=1

# mkexcel(path_to_images +'panda', animalname)
# mkmpld3figs(path_to_images +'ExcelSheets',animalname)

# os.chdir(path_to_figurearray)
# load_functions_allcores(path_to_figurearray,clients[:])
# pickledirectory = filterdirectory(os.curdir,".npz")
# output = balanced.map(figure_from_pickle,pickledirectory)

['406_593_2_2.jpg', '408_593_2_3.jpg']
<class 'numpy.ndarray'> int32
<class 'numpy.ndarray'> int32


