In [25]:
# install Pillow instead of PIL
from PIL import Image, ImageEnhance
from numpy import array, dstack, transpose, uint8, zeros, log2, log


In [26]:
# convert data to uint8 with pixel values less than 255
def convertData(pixmap, mode='RGB'):
  
  if pixmap.max() > 255:
    pixmap *= 255.0 / pixmap.max()

  pixmap = array(pixmap, uint8)
  img = Image.fromarray(pixmap, mode)
  
  return img

In [27]:
# Load array file (file name. identifying name, defaule signal value)
def loadArrayFile(fileName, sampleName, default=0.0):

  fileObj = open(fileName, 'r')
  
  # empty sets for row and column identifiers
  rows = set()
  cols = set()
  dataDict = {}

  # loop through lines in file and split on whitespace
  for line in fileObj:
    row, col, value = line.split()
    
    # new dic for missing row identifiers
    if row not in dataDict:
      dataDict[row] = {}

    # signal added by ro/column identifier
    if col in dataDict[row]:
      print('Repeat entry found for element %d, %d' % (row, col))
      continue

    # text string converted to float
    dataDict[row][col] = float(value)
    
    rows.add(row)
    cols.add(col)
    
  # sort data by rows and columns
  rows = sorted(rows)
  cols = sorted(cols)
  
  nRows = len(rows)
  nCols = len(cols)

  # NumPy data array initialized
  dataMatrix = zeros((nRows, nCols), float)

  # data extracted from dataDict, replace missing values with default
  for i, row in enumerate(rows):
    for j, col in enumerate(cols):
      value = dataDict[row].get(col, default)
      dataMatrix[i,j] = value
  
  fileObj.close()

  # microarray object which is passed back from function
  return Microarray(sampleName, dataMatrix, rows, cols)

In [28]:
# load file, identifying name, number of rows/columns
def loadImage(fileName, sampleName, nRows, nCols=None):

  # sets default data columns to number of rows
  if not nCols:
    nCols = nRows
  
  # matrix with 3 colors, rows, columns. 
  dataMatrix = zeros((3, nRows, nCols), float)
  
  # image object
  img = Image.open(fileName)
  # numeric array
  pixmap = imageToPixmapRGB(img)
  
  # divide total image by #col and #rows
  height, width, depth = pixmap.shape
  
  # floating point grid sizes-precise values
  dx = width/float(nCols)
  dy = height/float(nRows)
  # integer grid sizes-fixed number of pixels
  xSize = 1 + (width-1)//nCols
  ySize = 1 + (height-1)//nRows

  # loop by row, calculate first and last pixel position
  for row in range(nRows):
    yStart = int(row*dy)
    yEnd   = yStart + ySize

    # loop by row, calculate range of pixels
    for col in range(nCols):
      xStart = int(col*dx)
      xEnd   = xStart + xSize

      # use row/colunm pixel location-data summed from width and height
      elementData = pixmap[yStart:yEnd,xStart:xEnd]
      dataMatrix[:,row, col] = elementData.sum(axis=(0,1))

  # microarray object which is passed back from function
  return Microarray(sampleName, dataMatrix)

In [29]:
# converts image data into NumPy array
def imageToPixmapRGB(img):
  
  img2 = img.convert('RGB')
  w, h = img2.size  
  data = img2.getdata()

  pixmap = array(data, float)
  pixmap = pixmap.reshape((h,w,3))
  
  return pixmap


In [30]:
# imports file. species file name, nickname, number of rows and columns
def loadArrayImage(fileName, sampleName, nRows, nCols=None):

  if not nCols:
    nCols = nRows
  
  dataMatrix = zeros((3, nRows, nCols), float)
  
  img = Image.open(fileName) # Auto file type
  pixmap = imageToPixmapRGB(img)
  
  height, width, depth = pixmap.shape
  
  dx = width/float(nCols)
  dy = height/float(nRows)
  xSize = 1 + (width-1)//nCols
  ySize = 1 + (height-1)//nRows

  for row in range(nRows):
    yStart = int(row*dy)
    yEnd   = yStart + ySize

    for col in range(nCols):
      xStart = int(col*dx)
      xEnd   = xStart + xSize

      elementData = pixmap[yStart:yEnd,xStart:xEnd]
      dataMatrix[:,row, col] = elementData.sum(axis=(0,1))

  
  return Microarray(sampleName, dataMatrix)

In [31]:
# object holding data in microarray   
class Microarray(object):

  # sample name and NumPy array
  def __init__(self, name, data, rowData=None, colData=None):

   # make copy of array
    self.name = name 
    data = array(data)
    
    # size of array axes
    shape = data.shape
    
    # 3 axes=data channels, row, column
    if len(shape) == 3:
      self.nChannels, self.nRows, self.nCols = shape
    
    # 2 axes=1 channel, row, column -> forces to have 3 channels
    elif len(shape) == 2:
      self.nRows, self.nCols = shape
      self.nChannels = 1
      data = array([data])

    # exception for incorrect # of channels
    else:
      raise Exception('Array data must have either 2 or 3 axes.')  

    # new data tied to microarray object, copy of original
    self.data = data
    self.origData = array(data)
  
    # row/column labels added to object  
    self.rowData = rowData or range(self.nRows)
    self.colData = colData or range(self.nCols)

  # reset data to original values
  def reset(self):
  
    self.data = array(self.origData)
    self.nChannels = len(self.data)

  # error if input does not equal existing data
  def checkDataSize(self, channelData):
    
    channelData = array(channelData)
    if channelData.shape != (self.nRows, self.nCols):
      msg = 'Attempt use data of wrong size'
      raise Exception(msg)
  
    return channelData

  # replace all data for array layer
  def replaceData(self, channelData, index=0):

    channelData = self.checkDataSize(channelData)
    self.data[index] = channelData

  # exporting array data
  def writeData(self, fileName, separator=' '):
  
    fileObj = open(fileName, 'w')
    
    # loops through array and converts identifiers as strings
    for i in range(self.nRows):
      rowName = str(self.rowData[i])
      
      for j in range(self.nCols):
        colName = str(self.colData[j])

        # get data
        values = self.data[:,i,j]

        # line of text: row, column, value
        lineData = [rowName, colName]
        lineData += ['%.3f' % (v,) for v in values]
        
        # joins strings
        line = separator.join(lineData)
        # write to file object
        fileObj.write(line + '\n') 
  
  # create picture  
  def makeImage(self, squareSize=20, channels=None):
    
    # extreme values and data range
    minVal = self.data.min()
    maxVal = self.data.max() 
    dataRange = maxVal - minVal  

    # subtract minVAl for 0(black), 255=brightest
    adjData = (self.data - minVal) * 255 / dataRange
    # converted to 8-bit
    adjData = array(adjData, uint8)
   
    # 1 channel:red,green,blue from data layer; else up to 3 layers
    if not channels:
      if self.nChannels == 1:
        channels = (0,0,0) # Greyscale
      else:
        channels = list(range(self.nChannels))[:3]

    # blank channels: append 0
    pixmap = []
    for i in channels:
      if i is None:
        pixmap.append(zeros((self.nRows, self.nCols), uint8))
      else:
        pixmap.append(adjData[i])
        
    # add zeros if channels<3
    while len(pixmap) < 3:
      pixmap.append(zeros((self.nRows, self.nCols), uint8))
     
    # 3 color layers stacked along depth axis; create image
    pixmap = dstack(pixmap)
    img = Image.fromarray(pixmap, 'RGB')

    # resize image to squares in each row/column
    width = self.nCols * squareSize
    height = self.nRows * squareSize
    img = img.resize((width, height))
    
    return img

  # clip lowest data value
  def clipBaseline(self, threshold=None, channels=None, defaultProp=0.2):
    
    if not channels:
      channels = range(self.nChannels)
    
    channels = [tuple(channels)]
    
    maxVal = self.data[channels].max()
    if threshold is None:
      limit = maxVal * defaultProp
    else:
      limit = threshold
    
    boolArray = self.data[channels] < limit
    indices = boolArray.nonzero()
        
    self.data[indices] = limit

    self.data[channels] -= limit
    self.data[channels] *= maxVal / (maxVal-limit)

  # set upper limit to 1.0
  def normaliseMax(self, scale=1.0, perChannel=True):
    
    if perChannel:
      for i in range(self.nChannels):
        self.data[i] = self.data[i] * scale / self.data[i].max()
    
    else:
      self.data = self.data * scale / self.data.max()

  # normalize data using log scale
  def normaliseLogMean(self):

    self.clipBaseline(threshold=0.0)
    for i in range(self.nChannels):
      self.data[i] = log( 1.0 + self.data[i] / self.data[i].mean() )


  # combine channels specified via indices
  def combineChannels(self, indexA, indexB, combFunc=None, replace=None):
    
    if not combFunc:
      import operator
      combFunc= operator.add 

    channelData = combFunc(self.data[indexA], self.data[indexB])

    if replace is None:
      self.addChannel(channelData)
      
    else:
      self.replaceData(channelData, replace)

  # measures distances between each row
  def __hierarchicalRowCluster(self, dataMatrix):
    
    n = len(dataMatrix[0])
    distanceMatrix = zeros((n, n), float)
    
    # loop: subtrate each row from whole array
    for channelData in dataMatrix:
      for i, row in enumerate(channelData):
        diffs = channelData - row
        # square differences
        sqDiffs = diffs * diffs
        # square root:  difference from row to all other rows
        sqDists = sqDiffs.sum(axis=1)
        # distances put in distanceMatrix
        distanceMatrix[i,:] += sqDists

   

    
  # Clusters rows and reorders array to row and column hierarchy
  def hierarchicalCluster(self):
     
    rows = self.__hierarchicalRowCluster(self.data)

    swapped = transpose(self.data, axes=(0,2,1))
    cols = self.__hierarchicalRowCluster(swapped)

    # reordered rows/columns used to make hierarchical data array
    data = self.data[:,rows] # Rearrange
    data = data[:,:,cols]
    
    data = array(data.tolist()) # to fix PIL.Image bug
    
    name = self.name + '-Sorted'
    rowData = [self.rowData[i] for i in rows]
    colData = [self.colData[j] for j in cols]

    sortedArray = Microarray[tuple(name, data, rowData, colData)]
    
    return sortedArray 


In [32]:
if __name__ == '__main__':
  

  # difference between red and green channel           
  # load sample file    
  imgFile = 'RedGreenArray.png'
  rgArray = loadArrayImage(imgFile, 'TwoChannel', 18, 17)
  # subtract red channel - green channel
  diff = rgArray.data[0]-rgArray.data[1]
  # create image of 20x20 pixel squares
  rgArray.makeImage(20).show()
  
  # stored in first two color channels
  rgArray.replaceData(diff, 0)
  # flip green channel sign
  rgArray.replaceData(-diff, 1)
  # clip at 0 to remove negative values
  rgArray.clipBaseline(threshold=0.0, channels=(0,1))
  # create image of 20x20 pixel squares
  rgArray.makeImage(20).show()
  
  # log2 of ratio of red and green channels
  def log2Ratio(data1, data2):
    # input first array
    data1 = array(data1) + 1e-3
    # input second arrayreplaceData
    data2 = array(data2) + 1e-3

    # output array
    return log2(data1/data2)

  imgFile = 'RedGreenArray.png'
  rgArray = loadArrayImage(imgFile, 'TwoChannel', 18, 17)
  # combine red and green channels
  rgArray.combineChannels(0, 1, combFunc=log2Ratio, replace=2)
  # log2 result in blue channel
  rgArray.normaliseMax(perChannel=True)
  
  rgArray.makeImage(20, channels=(2,2,2)).show()

  #G-test  
  def gScore(data1, data2):
    data1 = array(data1) + 1e-3
    data2 = array(data2) + 1e-3

    return data1 * log2(data1/data2)

  rgArray = loadArrayImage(imgFile, 'TwoChannel', 18, 17)
  rgArray.combineChannels(0, 1, combFunc=gScore, replace=2)
  rgArray.makeImage(20, channels=(2,2,2)).show()
  sortedArray = rgArray.hierarchicalCluster()
  sortedArray.makeImage(20).show()


  print(rgArray.rowData)
  print(sortedArray.rowData)

TypeError: 'NoneType' object is not iterable