# Week 4 Assignment - Christina Morgenstern

This notebook demonstrates the analysis of an array experiment in Python. Specifically, it shows the code for importing two array channels, the combination of the data from the two channels, the generation of a result matrix based on a mathematical operation and the normalization of data values for fluorescence intensity. 

In [21]:
# import libraries for assignment
from PIL import Image
#from IPython.core.display import Image, display
from Images import imageToPixmapRGB # this is a module from the book. The code needs to be in the same folder as this code.
from numpy import array, zeros, log2, append, log, sqrt, uint8, dstack

### 1. Extract array image data

In [22]:
def loadArrayImage(fileName, sampleName, nRows, nCols=None) :
    
    """Function that loads an image file of a scanned array
    and stores the signal information in a data matrix."""
    
    if not nCols:
        nCols = nRows
        
    dataMatrix = zeros((3, nRows, nCols), float)
    
    img = Image.open(fileName) # creates image object from input file
    pixmap = imageToPixmapRGB(img) # converts image object to numeric array
    
    height, width, depth = pixmap.shape
    
    dx = width/float(nCols)
    dy = height/float(nRows)
    xSize = 1 + (width-1)//nCols
    ySize = 1 + (height-1)//nRows
    
    for row in range(nRows) :
        yStart = int(row*dy)
        yEnd = yStart + ySize
        
    for col in range(nCols) :
        xStart = int(col*dx)
        xEnd = xStart + xSize
        
        elementData = pixmap[yStart:yEnd, xStart:xEnd]
        dataMatrix[:, row, col] = elementData.sum(axis=(0,1))
    
    return Microarray(sampleName, dataMatrix)

### 2. Define Microarray class and relevant associated class functions

In [23]:
class Microarray(object):
    
    def __init__(self, name, data, rowData=None, colData=None):
        
        self.name = name
        data = array(data)
        
        # extract sizes of axes in the array
        shape = data.shape
        
        # check how many axes the data has and assign to channel data. Raise exception if data has neither 2 or 3 axes.
        if len(shape) == 3:
            self.nChannels, self.nRows, self.nCols = shape
            
        elif len(shape) == 2:
            self.nRows, self.nCols = shape
            self.nChannels = 1
            data = array([data])
        
        else:
            raise Exception ('Array data must have either 2 or 3 axes!!')
            
        self.data = data
        self.origData = array(data) # creates a copy of the original data
        
        # associate row and column labels with object
        self.rowData = rowData or range(self.nRows)
        self.colData = colData or range(self.nCols)
        
        
    def writeData(self, fileName, separator = ' '):
        
        """Function for exporting array data as text."""
        
        fileObj = open(fileName, 'w')
        
        for i in range(self.nRows) :
            rowName = str(self.rowData[i])
            
            for j in range(self.nCols) :
                colName = str(self.colData[j])
                
                values = self.data[:,i,j]
                
                lineData = [rowName, colName]
                lineData += ['%.3f' %(v,) for v in values]
                
                line = separator.join(lineData)
                fileObj.write(line + '\n')
                
                
    def makeImage(self, squareSize=20, channels=None):
        
        """Function that creates a picture representing the microarray data."""
    
        minVal = self.data.min()
        maxVal = self.data.max() 
        dataRange = maxVal - minVal  

        adjData = (self.data - minVal) * 255 / dataRange
        adjData = array(adjData, uint8)
   
        if not channels :
            if self.nChannels == 1:
                channels = (0,0,0) # Greyscale
        
            else:
                channels = list(range(self.nChannels))[:3]

        pixmap = []
        
        for i in channels:
        
            if i is None:
                pixmap.append(zeros((self.nRows, self.nCols), uint8))
        
            else:
                pixmap.append(adjData[i])
        
        while len(pixmap) < 3:
        
            pixmap.append(zeros((self.nRows, self.nCols), uint8))
     
        pixmap = dstack(pixmap)
        img = Image.fromarray(pixmap, 'RGB')

        width = self.nCols * squareSize
        height = self.nRows * squareSize
        img = img.resize((width, height))
    
        return img


   
   # Python code for combining two-channel red and green array data with the logarithm (base 2) of the ratio of the 2 channels.     
    
    def checkDataSize(self, channelData):
        
        """Function that checks data size and raises an error if input for a layer
        is not of the same size as the existing array data."""
        
        channelData = array(channelData)
        if channelData.shape != (self.nRows, self.nCols):
            msg = 'Attempt to use data of wrong size'
            raise Exception (msg)
        
        return channelData
    
    
    def setChannel(self, channelData, index=0):
        
        """Function to replace all the data for an array layer specified at a given index."""
        
        channelData = self.checkDataSize(channelData)
        self.data[index] = channelData

    
    def addChannel(self, channelData):
        
        """Function to add an entirley new layer of array data after the existing data."""
        
        channelData = self.checkDataSize(channelData)
        
        self.data = append(self.data, channelData, axis=0)
        self.nChannels += 1
        
    
    def combineChannels(self, indexA, indexB, combFunc=None, replace=None) :
        
        """Function to combine two channels via indices."""
        
        if not combFunc:
            import operator
            combFunc = operator.add
            
        channelData = combFunc(self.data[indexA], self.data[indexB])
        
        if replace is None:
            self.addChannel(channelData)
        
        else:
            self.setChannel(channelData, replace)
    
    
    
    # Python code for generating matrix of differences
    
    def matrixDifferences(self, dataMatrix) :
    
        n = len(dataMatrix[0])
        differenceMatrix = zeros((n, n), float)
    
        for channelData in dataMatrix:
            for i, row in enumerate(channelData):
                diffs = channelData - row
                sqDiffs = diffs * diffs
                sqDists = sqDiffs.sum(axis=1)
                differenceMatrix[i,:] += sqDists
            
        return differenceMatrix
    

    
    # Python code to normalize data values for fluorescence intensity data using a log scale
    
    def clipBaseline(self, threshold=None, channels=None, defaultProp=0.2):
        
        """Function that clips the lowest base value of the data so that it doesnÂ´t fall below a defined threshold."""
        
        if not channels:
            channels = range(self.nChannels)
        
        channels = [tuple(channels)]
        
                
    def normaliseLogMean(self) :
        
        """Function to normalize fluorescence intensity data. 
    Converts data to a logarithmic scale and applies normalization based on mean."""
        
        self.clipBaseline(threshold=0.0)
        for i in range(self.nChannels) :
            self.data[i] = log(1.0 + self.data[i] / self.data[i].mean())
    
    
        

In [24]:
# assign test image of array to imgFile variable
imgFile = 'RedGreenArray.png'

In [25]:
# load image into array
rgArray = loadArrayImage(imgFile, 'Test', 18, 17)
rgArray.makeImage(25).show()

In [13]:
# function used for combining channels
def log2Ratio(data1, data2) :
        
        """Function that finds logarithm (base 2) of the ratio of red and green channels."""
        
        data1 = array(data1) + 1e-3
        data2 = array(data2) + 1e-3
        
        return log2(data1/data2)

In [26]:
# combine red and green channels with logarithm of base 2 and place result in blue channel
rgArray.combineChannels(0, 1, combFunc=log2Ratio, replace=2)

In [27]:
# normalize data values for fluorescence intensity using a log scale and save image with normalized values
rgArray.normaliseLogMean()
rgArray.makeImage(25).show()

In [227]:
# calculate matrix of differences with values squared and replaced with square-rooted values
diff = rgArray.data[0] - rgArray.data[1]
SqDiff = sqrt(diff * diff)
rgArray.setChannel(SqDiff,0)
rgArray.setChannel(-SqDiff,1)
rgArray.clipBaseline(threshold=0.0, channels=(0,1))
rgArray.makeImage(20).show()