In [105]:
import pandas as pd
import numpy as np
from scipy import stats

# Create dummy dataframe
df = pd.DataFrame(np.random.randn(10,3), columns=list('ABC'))
print df

          A         B         C
0 -0.485411  1.779982  0.264710
1 -0.748583  0.585372  0.175153
2  0.324866 -1.345469 -0.094793
3 -1.844300 -0.369252  0.766790
4  0.475629 -0.424034  1.223641
5  2.217760  1.351454 -0.672208
6  0.336121  0.149488  1.643870
7  0.146909 -0.731763 -0.643913
8 -1.622560  0.192459  1.458735
9  0.101350  1.923659  1.569085


In [106]:
# copy a column so we have one untouched column
df['D'] = df['C']
print df

          A         B         C         D
0 -0.485411  1.779982  0.264710  0.264710
1 -0.748583  0.585372  0.175153  0.175153
2  0.324866 -1.345469 -0.094793 -0.094793
3 -1.844300 -0.369252  0.766790  0.766790
4  0.475629 -0.424034  1.223641  1.223641
5  2.217760  1.351454 -0.672208 -0.672208
6  0.336121  0.149488  1.643870  1.643870
7  0.146909 -0.731763 -0.643913 -0.643913
8 -1.622560  0.192459  1.458735  1.458735
9  0.101350  1.923659  1.569085  1.569085


In [68]:
# add some NaN to make sure math is working
df.iat[0,0] = np.nan
df.iat[2,1] = np.nan
df.iat[6,2] = np.nan
print df

          A         B         C         D
0       NaN  1.484082  0.724668  0.724668
1 -1.287107  0.624174  1.229390  1.229390
2  0.332089       NaN -0.401652 -0.401652
3  0.192129 -1.191247  1.304241  1.304241
4 -1.008701  0.683243  0.674695  0.674695
5 -0.548580  0.753986 -0.635863 -0.635863
6  0.988166 -0.103369       NaN  0.210190
7 -1.336377 -1.071787  0.305901  0.305901
8  0.202379  0.352954 -0.553086 -0.553086
9 -0.344860 -0.630883 -0.258813 -0.258813


In [69]:
# look at stats for this
df.describe()

Unnamed: 0,A,B,C,D
count,9.0,9.0,9.0,10.0
mean,-0.312318,0.100128,0.265498,0.259967
std,0.803635,0.910923,0.757505,0.714397
min,-1.336377,-1.191247,-0.635863,-0.635863
25%,-1.008701,-0.630883,-0.401652,-0.365942
50%,-0.34486,0.352954,0.305901,0.258046
75%,0.202379,0.683243,0.724668,0.712175
max,0.988166,1.484082,1.304241,1.304241


In [70]:
# compute zscores
#print stats.zscore(df)

cols = list(df.columns)
for col in cols:
    col_zscore = col + '_zscore' #add a new column
    df[col_zscore] = (df[col]-df[col].mean())/df[col].std(ddof=0)
print df

          A         B         C         D  A_zscore  B_zscore  C_zscore  \
0       NaN  1.484082  0.724668  0.724668       NaN  1.611447  0.642931   
1 -1.287107  0.624174  1.229390  1.229390 -1.286553  0.610189  1.349644   
2  0.332089       NaN -0.401652 -0.401652  0.850506       NaN -0.934145   
3  0.192129 -1.191247  1.304241  1.304241  0.665783 -1.503651  1.454451   
4 -1.008701  0.683243  0.674695  0.674695 -0.919106  0.678967  0.572959   
5 -0.548580  0.753986 -0.635863 -0.635863 -0.311826  0.761338 -1.262088   
6  0.988166 -0.103369       NaN  0.210190  1.716415 -0.236948       NaN   
7 -1.336377 -1.071787  0.305901  0.305901 -1.351582 -1.364554  0.056573   
8  0.202379  0.352954 -0.553086 -0.553086  0.679311  0.294386 -1.146183   
9 -0.344860 -0.630883 -0.258813 -0.258813 -0.042950 -0.851174 -0.734141   

   D_zscore  
0  0.685666  
1  1.430384  
2 -0.976219  
3  1.540827  
4  0.611931  
5 -1.321798  
6 -0.073446  
7  0.067776  
8 -1.199660  
9 -0.765460  


In [104]:
# check for the min / max zscores for a given row
for index, row in df.iterrows():
    print "Row {}, Min Zscore {: 1.3f}, Max Zscore {: 1.3f}".format(index,row[4:].min(),row[4:].max())
# Used this to print one line    
#row = 6    
#print df.iloc[row,4:].min(), df.iloc[row,4:].max()

Row 0, Min Zscore  0.643, Max Zscore  1.611
Row 1, Min Zscore -1.287, Max Zscore  1.430
Row 2, Min Zscore -0.976, Max Zscore  0.851
Row 3, Min Zscore -1.504, Max Zscore  1.541
Row 4, Min Zscore -0.919, Max Zscore  0.679
Row 5, Min Zscore -1.322, Max Zscore  0.761
Row 6, Min Zscore -0.237, Max Zscore  1.716
Row 7, Min Zscore -1.365, Max Zscore  0.068
Row 8, Min Zscore -1.200, Max Zscore  0.679
Row 9, Min Zscore -0.851, Max Zscore -0.043


In [107]:
print df

          A         B         C         D
0 -0.485411  1.779982  0.264710  0.264710
1 -0.748583  0.585372  0.175153  0.175153
2  0.324866 -1.345469 -0.094793 -0.094793
3 -1.844300 -0.369252  0.766790  0.766790
4  0.475629 -0.424034  1.223641  1.223641
5  2.217760  1.351454 -0.672208 -0.672208
6  0.336121  0.149488  1.643870  1.643870
7  0.146909 -0.731763 -0.643913 -0.643913
8 -1.622560  0.192459  1.458735  1.458735
9  0.101350  1.923659  1.569085  1.569085


In [138]:
# assuming you know the index this will find the column of the max value for that index
print "Method one = {}".format(df.iloc[1].idxmax())

a = df.iloc[1].max()
print a

cols = list(df.columns)
for col in cols:
    if a == df[col][1]:
        print "Method two = {}".format(col)


Method one = B
0.585371755535
Method two = B
