In [8]:
import numpy as np
import csv

class MedianPolish:
    """Fits an additive model using Tukey's median polish algorithm"""

    #note that the self argument is default that allows reference to an instance of the
    #class that has been defined by the user
    #no argument is actally specified to be passed to self by the user;
    #the first user specified argument
    #goes into the argument listed after self in the code of the class
    def __init__(self, array): 
        """Get numeric data from numpy ndarray to self.tbl, keep the original copy in tbl_org"""
        #checks if the argument called array is actually a numerical array by using
        #the comparison function isinstance to check if the type is np.ndarray
        if isinstance(array, np.ndarray):
            self.tbl_org = array
            #tbl_org will allow you to verify that the calculated row and column effects
            #will reproduce the polished matrix
            self.tbl = self.tbl_org.copy()
        else:
            raise TypeError('Expected the argument to be a numpy.ndarray.')

    @staticmethod
    def csv_to_ndarray(fname): 
        """ Utility method for loading ndarray from .csv file""" 
        try:
            #generates an array of type np.ndarray from a delimited text file
            return np.genfromtxt(fname, delimiter=",")	
        except Exception, e:
            print "Error loading file %s:" % fname
            raise

    def median_polish(self, max_iterations, method):
        """
            Implements Tukey's median polish alghoritm for additive models
            method - default is median, alternative is mean. That would give us result equal ANOVA.
        """
        

        grand_effect = 0
        median_row_effects = 0
        median_col_effects = 0
        #defines a vector that stores the row_effects during each iteration;
        #the vector is initialized with zeros and length equal to the number of rows
        #as determined by the function shape[0]
        row_effects = np.zeros(shape=self.tbl.shape[0])
        col_effects = np.zeros(shape=self.tbl.shape[1])

        for i in range(max_iterations):
            if method == 'median':
                #note that np.median's second arg tells which axis to perform the median
                #calculation, with axis = 1 being rows, and axis = 0 being columns
                row_medians = np.median(self.tbl  ,1) 
                row_effects += row_medians
                median_row_effects = np.median(row_effects)
            elif method == 'average':
                row_medians = np.average(self.tbl  ,1) 
                row_effects += row_medians
                median_row_effects = np.average(row_effects)
            grand_effect += median_row_effects
            #***the following line I have commented out, don't know why it is needed
            #row_effects -= median_row_effects
            
            #the following line reshapes the the row_medians array into a column, because
            #the np.newaxis function adds a new dimension, such that now the array
            #is a matrix with n number of rows and 1 column; hence the array is now a
            #column vector
            self.tbl -= row_medians[:,np.newaxis]
            #print(self.tbl)

            if method == 'median':
                col_medians = np.median(self.tbl,0) 
                col_effects += col_medians
                median_col_effects = np.median(col_effects)
            elif method == 'average':
                col_medians = np.average(self.tbl,0) 
                col_effects += col_medians
                median_col_effects = np.average(col_effects)

            #note that by default, arrays such as col_medians are formulated as a row vector
            #for use in numerical calculations
            self.tbl -= col_medians
            #print(self.tbl)

            #col_effects -= col_medians
            
            grand_effect += median_col_effects

        return grand_effect, col_effects, row_effects , self.tbl, self.tbl_org




if __name__ == "__main__":
    
    # Example and data on volume of rubber taken from chapter 6 of 
    # William N. Venables and Brian D. Ripley (2002). Statistics Complements to Modern Applied Statistics with S, ISBN 0-387-95457-0.
    #data_file='ripley.csv'
    #arr = MedianPolish.csv_to_ndarray(data_file) * 10000 
    
    # Example and data on Arisona state temperature taken from 
    # Chapter 10 of Tukey, John W (1977). Exploratory Data Analysis. Addison-Wesley. ISBN 0-201-07616-0.
    #data_file = "ArizonaTmp.csv"
    
    
    #with open('/Users/markfang/Dropbox/UCSD Grad work/RNA-Yeo Lab/Python scripts/ArizonaTmp.csv', 'rU') as file_name:
    data_file = open('/Users/markfang/Dropbox/UCSD Grad work/RNA-Yeo Lab/Python scripts/ArizonaTmp.csv', 'rU')
    #data_file = open('/Users/markfang/Dropbox/UCSD Grad work/RNA-Yeo Lab/Python scripts/ripley.csv', 'rU')
    #data_file = csv.reader(file_name)

    
    arr = MedianPolish.csv_to_ndarray(data_file) 

    tbl_avg = np.average(arr)
    #print(arr)
    arr -= tbl_avg
    print(arr)
    mp = MedianPolish(arr)

    #first argument indicates number of iterations to be run
    ge, ce, re, resid, tbl_org =  mp.median_polish(10, "median") 
    """print "median polish:"
    print "grand effect = ", ge
    print "column effects = ", ce 
    print "row effects = ", re 
    print "-----Table of Residuals-------" 
    print resid
    print "-----Original Table-------"
    print tbl_org

    ge, ce, re, resid, tbl_org =  mp.median_polish(0 , "average")
    print 
    print "average polish:" 
    print "grand effect = ", ge
    print "column effects = ", ce 
    print "row effects = ", re 
    print "-----Table of Residuals-------" 
    print resid
    print "-----Original Table-------"
    print tbl_org"""
    
    re_reshape = re[:,np.newaxis]
    tbl_org += tbl_avg
    tbl_resid = tbl_org - (tbl_avg + ce + re_reshape)
    
    print(tbl_org)
    print(re_reshape)
    print(ce)
    print(tbl_resid)
    
    tbl_resid_minusmedians = tbl_resid - np.median(tbl_resid)
    print(tbl_resid_minusmedians)
    median_absdev = np.median(np.absolute(tbl_resid_minusmedians))
    print(median_absdev)
    tbl_bscore = tbl_resid / median_absdev
    print(tbl_bscore)
    
    

[[  1.25238095  26.15238095  30.65238095]
 [ -0.54761905  24.35238095  29.75238095]
 [ -6.94761905  18.75238095  24.35238095]
 [-17.84761905   6.85238095  12.45238095]
 [-28.14761905  -5.54761905   0.25238095]
 [-35.54761905 -11.84761905  -6.84761905]
 [-38.64761905 -14.24761905  -8.64761905]]
[[ 65.2  90.1  94.6]
 [ 63.4  88.3  93.7]
 [ 57.   82.7  88.3]
 [ 46.1  70.8  76.4]
 [ 35.8  58.4  64.2]
 [ 28.4  52.1  57.1]
 [ 25.3  49.7  55.3]]
[[ 25.95238095]
 [ 24.15238095]
 [ 18.75238095]
 [  6.85238095]
 [ -5.34761905]
 [-11.84761905]
 [-14.24761905]]
[-24.7   0.    5.6]
[[  0.00000000e+00   2.00000000e-01  -9.00000000e-01]
 [ -1.42108547e-14   2.00000000e-01   0.00000000e+00]
 [ -1.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   1.42108547e-14]
 [  1.90000000e+00  -2.00000000e-01   0.00000000e+00]
 [  1.00000000e+00   0.00000000e+00  -6.00000000e-01]
 [  3.00000000e-01   0.00000000e+00   0.00000000e+00]]
[[  0.00000000e+00   2.00000000e-01  -9.00000