In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split, ShuffleSplit
from classifiers import TransparentLogisticRegression, TransparentLinearRegression
from matplotlib import pylab as pl
from scipy.sparse import diags
from IPython import display
from scale import decision_tree_scale
from IPython.display import display, HTML
from ipy_table import *

In [2]:
# calculate # of instances between mean and splitting points for each feature
def get_num_instances(X, mean1, mean2):
    # m is # of instances, n is # of features, also the length of mean1 and mean2
    m, n = np.shape(X)
    result = []
    for i in range(n):
        current_feature = X[:,i]
        if mean1[i]>mean2[i]:
            a = mean2[i]
            b = mean1[i]
        else:
            a = mean1[i]
            b = mean2[i]
        num = ((current_feature > a) & (current_feature < b)).sum()
        result.append(num)
   
    return result

In [3]:
# All information for test instances
class instance():
    def __init__(self, feature_name, ori_features, ss_features, ig_features, clf_ori, clf_ss, clf_ig):
        
        self.feature_name = feature_name
        self.ori_features = ori_features
        self.ss_features = ss_features
        self.ig_features = ig_features
        self.clf_ori = clf_ori
        self.clf_ss = clf_ss
        self.clf_ig = clf_ig
        
        self.neg_evi_ori, self.pos_evi_ori = self.evidence_prediction(self.clf_ori, self.ori_features)
        self.neg_evi_ss, self.pos_evi_ss = self.evidence_prediction(self.clf_ss, self.ss_features)
        self.neg_evi_ig, self.pos_evi_ig = self.evidence_prediction(self.clf_ig, self.ig_features)
        
        self.proba_ori = self.proba_prediction(self.clf_ori, self.ori_features)
        self.proba_ss = self.proba_prediction(self.clf_ss, self.ss_features)
        self.proba_ig = self.proba_prediction(self.clf_ig, self.ig_features)
                       
    def get_information(self):
        
        other_info = ['Bias', 'Neg_evi', 'Pos_evi','Proba']
        
        header = [['Features', 'Ori_value', 'ss_value', 'ig_value','w * v_ori', 'w * v_ss', 'w * v_ig']]
        
        ori_evidence = np.multiply(self.ori_features, self.clf_ori.coef_).flatten()
        ss_evidence = np.multiply(self.ss_features, self.clf_ss.coef_).flatten()
        ig_evidence = np.multiply(self.ig_features, self.clf_ig.coef_).flatten()
        
        ori_evidence = np.around(ori_evidence, decimals=2)
        ss_evidence = np.around(ss_evidence, decimals=2)
        ig_evidence = np.around(ig_evidence, decimals=2)
        
        ori_features_round = np.around(self.ori_features, decimals=2)
        ss_features_round = np.around(self.ss_features, decimals=2)
        ig_features_round = np.around(self.ig_features, decimals=2)
        
        self.neg_evi_ori = np.around(self.neg_evi_ori, decimals=2); self.pos_evi_ori = np.around(self.pos_evi_ori, decimals=2)
        self.neg_evi_ss = np.around(self.neg_evi_ss, decimals=2); self.pos_evi_ss = np.around(self.pos_evi_ss, decimals=2)
        self.neg_evi_ig = np.around(self.neg_evi_ig, decimals=2); self.pos_evi_ig = np.around(self.pos_evi_ig, decimals=2)
        
        self.proba_ori = np.around(self.proba_ori[0], decimals=3)
        self.proba_ss = np.around(self.proba_ss[0], decimals=3)
        self.proba_ig = np.around(self.proba_ig[0], decimals=3)
        
        intercept_ori = round(self.clf_ori.intercept_[0], 2)
        intercept_ss = round(self.clf_ss.intercept_[0], 2)
        intercept_ig = round(self.clf_ig.intercept_[0], 2)

        value_array = np.array([self.feature_name, ori_features_round, ss_features_round, ig_features_round, 
                                    ori_evidence, ss_evidence, ig_evidence])
        
        # print value_array.T
        
        value_array = combine_table(header, value_array.T)
            
        bias_list = np.array(['Bias', intercept_ori, intercept_ss, intercept_ig, 'None', 'None', 'None'])
        neg_list = np.array(['Neg_evi', self.neg_evi_ori[0], self.neg_evi_ss[0], self.neg_evi_ig[0], 'None', 'None', 'None'])
        pos_list = np.array(['Pos_evi', self.pos_evi_ori[0], self.pos_evi_ss[0], self.pos_evi_ig[0], 'None', 'None', 'None'])
        prob_list = np.array(['Proba', str(self.proba_ori), str(self.proba_ss), str(self.proba_ig), 'None', 'None', 'None'])
            
        other_array = np.array([bias_list, neg_list, pos_list, prob_list])
            
        result = np.concatenate((value_array, other_array), axis=0) 
        
        return make_table(result)
        
            
    def evidence_prediction(self, clf, features):
        
        features = features.reshape((1,-1))
        neg_evi, pos_evi = clf.predict_evidences(features)
        return neg_evi, pos_evi
    
    def proba_prediction(self, clf, features):
        features = features.reshape((1,-1))
        proba = clf.predict_proba(features)
        return proba

In [4]:
# combine header and table 
def combine_table(header, table):

    result = np.concatenate((header, table), axis=0)
    return result

In [5]:
def transform(X, mns, sstd, axis=0):
    X = np.asanyarray(X)
    if axis and mns.ndim < X.ndim:

        return ((X - np.expand_dims(mns, axis=axis)) /
                    np.expand_dims(sstd, axis=axis))
    else:

        return (X - mns) / sstd

In [6]:
# breast-w
# source link: https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Original%29
# relevant paper: Multisurface method of pattern separation for medical diagnosis applied to breast cytology

dataset = "diabetes.csv"
class_index = 8
num_cols = 9
classes = ['tested_negative', 'tested_positive']
read_cols = [i for i in range(num_cols) if i != class_index]
file_path = "D:\\IIT_Master\\2016 Spring\\CS597\\uci\\uci\\uci-tar\\nominal\\" + dataset

dataset = "breast-w.csv"
class_index = 9
num_cols = 10
classes = ['benign', 'malignant']
read_cols = [i for i in range(num_cols) if i != class_index]
file_path = "D:\\IIT_Master\\2016 Spring\\CS597\\uci\\uci\\uci-tar\\nominal\\"+dataset


In [7]:
# ['Clump_Thickness' 'Cell_Size_Uniformity' 'Cell_Shape_Uniformity' 'Marginal_Adhesion' 'Single_Epi_Cell_Size' 'Bare_Nuclei' 'Bland_Chromatin'
#  'Normal_Nucleoli' 'Mitoses' 


with open(file_path, 'r') as f:
    header = f.readline()
    #print header
    #header = np.fromstring("a, b", dtype=np.str_, sep=',')
    header = np.array(header.split(','))
    feature_names = header[read_cols]

In [8]:
# Loading the data and splitting the train, test

X = np.loadtxt(file_path, dtype=float, delimiter=",", skiprows=1, \
                   usecols=read_cols)
y = np.loadtxt(file_path, dtype=int, delimiter=",", skiprows=1, \
                   usecols=(class_index,), converters={class_index: lambda x: classes.index(x)})

num_inst, num_feat = np.shape(X)
print "The shape of this data set:",np.shape(X)

ss = ShuffleSplit(num_inst, n_iter=1, test_size=0.95, random_state=42)

for i, j in ss:
    train_index = i 
    test_index = j
    
y_train = y[train_index]
y_test = y[test_index]

print "# of test instances: ",len(y_test)

The shape of this data set: (699L, 9L)
# of test instances:  665


In [9]:
# Determine binary features
num_features = X.shape[1]
non_binary = []
binary = []
for i in range(num_features):
    if len(np.unique(X[:,i])) != 2:
        non_binary.append(i)
    else:
        binary.append(i)

In [10]:
# Original features
X_original = np.copy(X)

X_train_ori = X_original[train_index]
X_test_ori = X_original[test_index]

# Standard scale non binary features
X_ss = np.copy(X)

if len(non_binary) > 0:
    X_ss[:,non_binary]=scale(X[:,non_binary])
    
if len(binary) > 0: 
    print "binary features exist"
    X_b = X_ss[:,binary]
    X_b[X_b == 0] = -1
    X_ss[:,binary] = X_b
    
X_train_ss = X_ss[train_index]
X_test_ss = X_ss[test_index]

# Information gain scaling non binary features

X_ig = np.copy(X_original)
scale_ = decision_tree_scale()

X_train_ig = X_ig[train_index]
X_test_ig = X_ig[test_index]

if len(non_binary) > 0: 
    print "IG Scale for non_binary features -- Train"
    X_train_ig[:,non_binary]=scale_.fit_transform(X_train_ig[:,non_binary], y_train)

if len(binary) > 0: 
    
    X_b = X_train_ig[:,binary]
    X_b[X_b == 0] = -1
    X_train_ig[:,binary] = X_b
    
if len(non_binary) > 0:
    
    print "IG Scale for non_binary features -- Test"
    X_test_ig[:,non_binary]=scale_.transform(X_test_ig[:,non_binary])
    
if len(binary) > 0:
    print "binary features exist"
    X_b = X_test_ig[:,binary]
    X_b[X_b == 0] = -1
    X_test_ig[:,binary] = X_b

IG Scale for non_binary features -- Train
IG Scale for non_binary features -- Test


In [11]:
mean_feature_ss = np.mean(X, axis=0)
split_feature_ig = scale_.mns

mean_X = np.zeros((num_feat, ))
split_X = np.zeros((num_feat, ))

mean_X[non_binary] = mean_feature_ss
mean_X[binary] = 0

split_X[non_binary] = split_feature_ig
split_X[binary] = 0 

num_train = get_num_instances(X_train_ori, mean_X , split_X)
num_test = get_num_instances(X_test_ori, mean_X , split_X)
num_all = get_num_instances(X, mean_X , split_X)


mean = np.around(mean_X, decimals=2)
splitting = np.around(split_X, decimals=2)


table1_header = [['Features', 'mean', 'splitting', '# of train', '# of test', '# of total']]

result1 = np.array([header[:class_index].tolist(), mean.tolist(), splitting.tolist(), num_train, num_test, num_all])
result1 = combine_table(table1_header,result1.T)


display(make_table(result1))

0,1,2,3,4,5
Features,mean,splitting,# of train,# of test,# of total
Clump_Thickness,4.42,6.5,10,154,164
Cell_Size_Uniformity,3.13,3.5,0,0,0
Cell_Shape_Uniformity,3.21,4.0,0,0,0
Marginal_Adhesion,2.81,1.5,3,55,58
Single_Epi_Cell_Size,3.22,2.5,6,66,72
Bare_Nuclei,3.54,1.5,3,55,58
Bland_Chromatin,3.44,3.5,0,0,0
Normal_Nucleoli,2.87,5.5,2,79,81
Mitoses,1.59,3.5,6,62,68


In [12]:
# fit train instances for each classifier

clf_ori = TransparentLogisticRegression()
clf_ss = TransparentLogisticRegression()
clf_ig = TransparentLogisticRegression()

clf_ori.fit(X_train_ori, y_train)
clf_ss.fit(X_train_ss, y_train)
clf_ig.fit(X_train_ig, y_train)

TransparentLogisticRegression(C=1.0, class_weight=None, dual=False,
               fit_intercept=True, intercept_scaling=1, max_iter=100,
               multi_class='ovr', n_jobs=1, penalty='l2',
               random_state=None, solver='liblinear', tol=0.0001,
               verbose=0, warm_start=False)

In [13]:
y_predict_ori = clf_ori.predict(X_test_ori)
y_pred_prob_ori = clf_ori.predict_proba(X_test_ori)
neg_evi_ori, pos_evi_ori = clf_ori.predict_evidences(X_test_ori)

y_predict_ss = clf_ss.predict(X_test_ss)
y_pred_prob_ss = clf_ss.predict_proba(X_test_ss)
neg_evi_ss, pos_evi_ss = clf_ss.predict_evidences(X_test_ss)

y_predict_ig = clf_ig.predict(X_test_ig)
y_pred_prob_ig = clf_ig.predict_proba(X_test_ig)
neg_evi_ig, pos_evi_ig = clf_ig.predict_evidences(X_test_ig)

print accuracy_score(y_test,y_predict_ori)
print accuracy_score(y_test,y_predict_ss)
print accuracy_score(y_test,y_predict_ig)

0.896240601504
0.962406015038
0.962406015038


In [14]:
ori_weight = list(clf_ori.intercept_)+ clf_ori.coef_[0].tolist()
ss_weight = list(clf_ss.intercept_)+ clf_ss.coef_[0].tolist()
ig_weight = list(clf_ig.intercept_)+ clf_ig.coef_[0].tolist()

ori_weight = np.around(ori_weight, decimals=2)
ss_weight = np.around(ss_weight, decimals=2)
ig_weight = np.around(ig_weight, decimals=2)


table2_header = [['Features', 'ori_weight', 'ss_weight', 'ig_weight']]
features = ['Bias']
features.extend(header[:class_index].tolist())
table2 = np.array([features, ori_weight, ss_weight, ig_weight])

result2 = combine_table(table2_header,table2.T)

make_table(result2)

0,1,2,3
Features,ori_weight,ss_weight,ig_weight
Bias,-1.74,-0.66,-0.27
Clump_Thickness,-0.55,0.41,0.67
Cell_Size_Uniformity,0.84,0.64,0.62
Cell_Shape_Uniformity,0.05,0.77,0.82
Marginal_Adhesion,0.11,0.19,0.11
Single_Epi_Cell_Size,-0.12,0.81,0.8
Bare_Nuclei,0.33,0.5,0.52
Bland_Chromatin,0.4,0.93,0.97
Normal_Nucleoli,0.11,0.25,0.36


In [15]:
diff_decision = y_predict_ss - y_predict_ig

diff_proba = abs(y_pred_prob_ss[:,0] - y_pred_prob_ig[:,0])

# print len(np.where(diff_decision!=0)[0])
# print diff_proba

In [16]:
# Clump_Thickness

print "This is feature ", header[0]

current_feature = X_test_ori[:,0]

num_feature1 = num_test[0]

mean1 = mean[0]

split1 = splitting[0]

if num_feature1 == 0:
    
    print "There is no instance between mean and splitting point."
    
else:

    upper = max(mean1, split1)
    lower = min(mean1, split1)
    
    index1 = np.where(np.logical_and(current_feature>lower, current_feature<upper))[0]
    
    diff_decision_interval = diff_decision[index1]
    index1_different_decision = np.where(diff_decision_interval!=0)[0]
    
    if len(index1_different_decision)==0:
        
        print "There is no different decision made by 2 classifiers."
        
    else:
        
        print "This is the instances that IG and SS made different decisions between mean and splitting. "
        
        print "The # of the instances is ", len(index1_different_decision)
        
        for i in index1_different_decision:
                      
            temp_instance = instance(header[:class_index],X_test_ori[index1[i]], 
                                               X_test_ss[index1[i]], X_test_ig[index1[i]], 
                                               clf_ori, clf_ss, clf_ig)
            temp_table = temp_instance.get_information()
            print "The actual label of following instance is ", y_test[index1[i]]
            display(temp_table)
    
# The most differentce of probability between 2 classifiers.

    print "The most difference of probability between mean and splitting is following: "
    
    proba_diff_interval = diff_proba[index1]
    
    most_diff_prob_index = np.argmax(proba_diff_interval)
    
    print "The most difference of probability is ", diff_proba[index1[most_diff_prob_index]]
    
    most_diff_prob_instance = instance(header[:class_index],X_test_ori[index1[most_diff_prob_index]], X_test_ss[index1[most_diff_prob_index]],
                                     X_test_ig[index1[most_diff_prob_index]], clf_ori, clf_ss, clf_ig)
    most_diff_prob_instance_table = most_diff_prob_instance.get_information()
    
    display(most_diff_prob_instance_table)
    
    print "The acutal label is ", y_test[index1[most_diff_prob_index]]

This is feature  Clump_Thickness
This is the instances that IG and SS made different decisions between mean and splitting. 
The # of the instances is  1
The actual label of following instance is  0


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,6.0,0.56,-0.17,-3.32,0.23,-0.11
Cell_Size_Uniformity,3.0,-0.04,-0.16,2.52,-0.03,-0.1
Cell_Shape_Uniformity,3.0,-0.07,-0.32,0.16,-0.05,-0.26
Marginal_Adhesion,3.0,0.07,0.41,0.34,0.01,0.04
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,2.0,-0.43,0.09,0.67,-0.22,0.05
Bland_Chromatin,6.0,1.05,0.98,2.4,0.98,0.95
Normal_Nucleoli,1.0,-0.61,-1.14,0.11,-0.15,-0.41
Mitoses,1.0,-0.34,-1.03,-0.6,0.04,-0.16


The most difference of probability between mean and splitting is following: 
The most difference of probability is  0.170628888321


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,5.0,0.21,-0.5,-2.77,0.09,-0.33
Cell_Size_Uniformity,4.0,0.28,0.16,3.36,0.18,0.1
Cell_Shape_Uniformity,6.0,0.94,0.64,0.33,0.72,0.52
Marginal_Adhesion,10.0,2.52,2.35,1.14,0.49,0.25
Single_Epi_Cell_Size,2.0,-0.55,-0.22,-0.24,-0.44,-0.18
Bare_Nuclei,10.0,1.79,1.55,3.33,0.9,0.81
Bland_Chromatin,4.0,0.23,0.2,1.6,0.22,0.19
Normal_Nucleoli,1.0,-0.61,-1.14,0.11,-0.15,-0.41
Mitoses,1.0,-0.34,-1.03,-0.6,0.04,-0.16


The acutal label is  1


In [17]:
# Cell_Size_Uniformity

print "This is feature ", header[1]
current_feature = X_test_ori[:,1]

num_feature1 = num_test[1]

mean1 = mean[1]

split1 = splitting[1]

if num_feature1 == 0:
    
    print "There is no instance between mean and splitting point."
    
else:

    upper = max(mean1, split1)
    lower = min(mean1, split1)
    
    index1 = np.where(np.logical_and(current_feature>lower, current_feature<upper))[0]
    
    diff_decision_interval = diff_decision[index1]
    
    index1_different_decision = np.where(diff_decision_interval!=0)[0]
    
    if len(index1_different_decision)==0:
        print "There is no different decision made by 2 classifiers."
    else:
        
        print "This is the instances that IG and SS made different decisions between mean and splitting. "
        
        for i in index1_different_decision:
            
            print len(index1_different_decision)
            
            temp_instance = instance(header[:class_index],X_test_ori[index1[i]], 
                                               X_test_ss[index1[i]], X_test_ig[index1[i]], 
                                               clf_ori, clf_ss, clf_ig)
            temp_table = temp_instance.get_information()
            print "The actual label of following instance is ", y_test[index1[i]]
            display(temp_table)
    
# The most differentce of probability between 2 classifiers.
    print "The most difference of probability between mean and splitting is following: "
    proba_diff_interval = diff_proba[index1]
    
    most_diff_prob_index = np.argmax(proba_diff_interval)
    
    print "The most difference of probability is ", diff_proba[index1[most_diff_prob_index]]
    
    most_diff_prob_instance = instance(header[:class_index],X_test_ori[index1[most_diff_prob_index]], X_test_ss[index1[most_diff_prob_index]],
                                     X_test_ig[index1[most_diff_prob_index]], clf_ori, clf_ss, clf_ig)
    most_diff_prob_instance_table = most_diff_prob_instance.get_information()
    
    display(most_diff_prob_instance_table)
    
    print "The acutal label is ", y_test[index1[most_diff_prob_index]]

This is feature  Cell_Size_Uniformity
There is no instance between mean and splitting point.


In [18]:
# Cell_Shape_Uniformity

print "This is feature ", header[2]
current_feature = X_test_ori[:,2]

num_feature1 = num_test[2]
mean1 = mean[2]
split1 = splitting[2]

if num_feature1 == 0:
    
    print "There is no instance between mean and splitting point."
    
else:

    upper = max(mean1, split1)
    lower = min(mean1, split1)
    
    index1 = np.where(np.logical_and(current_feature>lower, current_feature<upper))[0]
    
    diff_decision_interval = diff_decision[index1]
    
    index1_different_decision = np.where(diff_decision_interval!=0)[0]
    
    if len(index1_different_decision)==0:
        print "There is no different decision made by 2 classifiers."
    else:
        
        print "This is the instances that IG and SS made different decisions between mean and splitting. "
        
        for i in index1_different_decision:
            
            print len(index1_different_decision)
            
            temp_instance = instance(header[:class_index],X_test_ori[index1[i]], 
                                               X_test_ss[index1[i]], X_test_ig[index1[i]], 
                                               clf_ori, clf_ss, clf_ig)
            temp_table = temp_instance.get_information()
            print "The actual label of following instance is ", y_test[index1[i]]
            display(temp_table)
    
# The most differentce of probability between 2 classifiers.
    print "The most difference of probability between mean and splitting is following: "
    proba_diff_interval = diff_proba[index1]
    
    most_diff_prob_index = np.argmax(proba_diff_interval)
    
    print "The most difference of probability is ", diff_proba[index1[most_diff_prob_index]]
    
    most_diff_prob_instance = instance(header[:class_index],X_test_ori[index1[most_diff_prob_index]], X_test_ss[index1[most_diff_prob_index]],
                                     X_test_ig[index1[most_diff_prob_index]], clf_ori, clf_ss, clf_ig)
    most_diff_prob_instance_table = most_diff_prob_instance.get_information()
    
    display(most_diff_prob_instance_table)
    
    print "The acutal label is ", y_test[index1[most_diff_prob_index]]


This is feature  Cell_Shape_Uniformity
There is no instance between mean and splitting point.


In [19]:
# Marginal_Adhesion
print "This is feature ", header[3]
current_feature = X_test_ori[:,3]

num_feature1 = num_test[3]

mean1 = mean[3]

split1 = splitting[3]

if num_feature1 == 0:
    
    print "There is no instance between mean and splitting point."
    
else:

    upper = max(mean1, split1)
    lower = min(mean1, split1)
    
    index1 = np.where(np.logical_and(current_feature>lower, current_feature<upper))[0]
    
    diff_decision_interval = diff_decision[index1]
    
    index1_different_decision = np.where(diff_decision_interval!=0)[0]
    
    if len(index1_different_decision)==0:
        print "There is no different decision made by 2 classifiers."
    else:
        
        print "This is the instances that IG and SS made different decisions between mean and splitting. "
        
        for i in index1_different_decision:
            
            print len(index1_different_decision)
            
            temp_instance = instance(header[:class_index],X_test_ori[index1[i]], 
                                               X_test_ss[index1[i]], X_test_ig[index1[i]], 
                                               clf_ori, clf_ss, clf_ig)
            temp_table = temp_instance.get_information()
            print "The actual label of following instance is ", y_test[index1[i]]
            display(temp_table)
    
# The most differentce of probability between 2 classifiers.
    print "The most difference of probability between mean and splitting is following: "
    proba_diff_interval = diff_proba[index1]
    
    most_diff_prob_index = np.argmax(proba_diff_interval)
    
    print "The most difference of probability is ", diff_proba[index1[most_diff_prob_index]]
    
    most_diff_prob_instance = instance(header[:class_index],X_test_ori[index1[most_diff_prob_index]], X_test_ss[index1[most_diff_prob_index]],
                                     X_test_ig[index1[most_diff_prob_index]], clf_ori, clf_ss, clf_ig)
    most_diff_prob_instance_table = most_diff_prob_instance.get_information()
    
    display(most_diff_prob_instance_table)
    
    print "The acutal label is ", y_test[index1[most_diff_prob_index]]

This is feature  Marginal_Adhesion
There is no different decision made by 2 classifiers.
The most difference of probability between mean and splitting is following: 
The most difference of probability is  0.12341413856


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,3.0,-0.5,-1.16,-1.66,-0.21,-0.77
Cell_Size_Uniformity,3.0,-0.04,-0.16,2.52,-0.03,-0.1
Cell_Shape_Uniformity,5.0,0.6,0.32,0.27,0.46,0.26
Marginal_Adhesion,2.0,-0.28,0.14,0.23,-0.05,0.01
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,10.0,1.79,1.55,3.33,0.9,0.81
Bland_Chromatin,7.0,1.46,1.37,2.8,1.36,1.34
Normal_Nucleoli,1.0,-0.61,-1.14,0.11,-0.15,-0.41
Mitoses,1.0,-0.34,-1.03,-0.6,0.04,-0.16


The acutal label is  1


In [20]:
# Single_Epi_Cell_Size

print "This is feature ", header[4]
current_feature = X_test_ori[:,4]

num_feature1 = num_test[4]

mean1 = mean[4]

split1 = splitting[4]

if num_feature1 == 0:
    
    print "There is no instance between mean and splitting point."
    
else:

    upper = max(mean1, split1)
    lower = min(mean1, split1)
    
    index1 = np.where(np.logical_and(current_feature>lower, current_feature<upper))[0]
    
    diff_decision_interval = diff_decision[index1]
    
    index1_different_decision = np.where(diff_decision_interval!=0)[0]
    
    if len(index1_different_decision)==0:
        print "There is no different decision made by 2 classifiers."
    else:
        
        print "This is the instances that IG and SS made different decisions between mean and splitting. "
        
        for i in index1_different_decision:
            
            print len(index1_different_decision)
            
            temp_instance = instance(header[:class_index],X_test_ori[index1[i]], 
                                               X_test_ss[index1[i]], X_test_ig[index1[i]], 
                                               clf_ori, clf_ss, clf_ig)
            temp_table = temp_instance.get_information()
            print "The actual label of following instance is ", y_test[index1[i]]
            display(temp_table)
    
# The most differentce of probability between 2 classifiers.
    print "The most difference of probability between mean and splitting is following: "
    proba_diff_interval = diff_proba[index1]
    
    most_diff_prob_index = np.argmax(proba_diff_interval)
    
    print "The most difference of probability is ", diff_proba[index1[most_diff_prob_index]]
    
    most_diff_prob_instance = instance(header[:class_index],X_test_ori[index1[most_diff_prob_index]], X_test_ss[index1[most_diff_prob_index]],
                                     X_test_ig[index1[most_diff_prob_index]], clf_ori, clf_ss, clf_ig)
    most_diff_prob_instance_table = most_diff_prob_instance.get_information()
    
    display(most_diff_prob_instance_table)
    
    print "The acutal label is ", y_test[index1[most_diff_prob_index]]

This is feature  Single_Epi_Cell_Size
This is the instances that IG and SS made different decisions between mean and splitting. 
3
The actual label of following instance is  1


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,10.0,1.98,1.16,-5.54,0.82,0.77
Cell_Size_Uniformity,4.0,0.28,0.16,3.36,0.18,0.1
Cell_Shape_Uniformity,2.0,-0.41,-0.64,0.11,-0.31,-0.52
Marginal_Adhesion,1.0,-0.63,-0.14,0.11,-0.12,-0.01
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,2.0,-0.43,0.09,0.67,-0.22,0.05
Bland_Chromatin,4.0,0.23,0.2,1.6,0.22,0.19
Normal_Nucleoli,3.0,0.04,-0.63,0.33,0.01,-0.23
Mitoses,10.0,4.91,2.69,-6.03,-0.55,0.43


3
The actual label of following instance is  1


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,3.0,-0.5,-1.16,-1.66,-0.21,-0.77
Cell_Size_Uniformity,6.0,0.94,0.82,5.05,0.6,0.51
Cell_Shape_Uniformity,4.0,0.27,0.0,0.22,0.2,0.0
Marginal_Adhesion,10.0,2.52,2.35,1.14,0.49,0.25
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,3.0,-0.15,0.27,1.0,-0.08,0.14
Bland_Chromatin,3.0,-0.18,-0.2,1.2,-0.17,-0.19
Normal_Nucleoli,4.0,0.37,-0.38,0.43,0.09,-0.14
Mitoses,1.0,-0.34,-1.03,-0.6,0.04,-0.16


3
The actual label of following instance is  0


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,6.0,0.56,-0.17,-3.32,0.23,-0.11
Cell_Size_Uniformity,3.0,-0.04,-0.16,2.52,-0.03,-0.1
Cell_Shape_Uniformity,3.0,-0.07,-0.32,0.16,-0.05,-0.26
Marginal_Adhesion,3.0,0.07,0.41,0.34,0.01,0.04
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,2.0,-0.43,0.09,0.67,-0.22,0.05
Bland_Chromatin,6.0,1.05,0.98,2.4,0.98,0.95
Normal_Nucleoli,1.0,-0.61,-1.14,0.11,-0.15,-0.41
Mitoses,1.0,-0.34,-1.03,-0.6,0.04,-0.16


The most difference of probability between mean and splitting is following: 
The most difference of probability is  0.335179990581


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,10.0,1.98,1.16,-5.54,0.82,0.77
Cell_Size_Uniformity,4.0,0.28,0.16,3.36,0.18,0.1
Cell_Shape_Uniformity,2.0,-0.41,-0.64,0.11,-0.31,-0.52
Marginal_Adhesion,1.0,-0.63,-0.14,0.11,-0.12,-0.01
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,2.0,-0.43,0.09,0.67,-0.22,0.05
Bland_Chromatin,4.0,0.23,0.2,1.6,0.22,0.19
Normal_Nucleoli,3.0,0.04,-0.63,0.33,0.01,-0.23
Mitoses,10.0,4.91,2.69,-6.03,-0.55,0.43


The acutal label is  1


In [21]:
# Bare_Nuclei
print "This is feature ", header[5]
current_feature = X_test_ori[:,5]

num_feature1 = num_test[5]

mean1 = mean[5]

split1 = splitting[5]

if num_feature1 == 0:
    
    print "There is no instance between mean and splitting point."
    
else:

    upper = max(mean1, split1)
    lower = min(mean1, split1)
    
    index1 = np.where(np.logical_and(current_feature>lower, current_feature<upper))[0]
    
    diff_decision_interval = diff_decision[index1]
    
    index1_different_decision = np.where(diff_decision_interval!=0)[0]
    
    if len(index1_different_decision)==0:
        print "There is no different decision made by 2 classifiers."
    else:
        
        print "This is the instances that IG and SS made different decisions between mean and splitting. "
        
        for i in index1_different_decision:
            
            print len(index1_different_decision)
            
            temp_instance = instance(header[:class_index],X_test_ori[index1[i]], 
                                               X_test_ss[index1[i]], X_test_ig[index1[i]], 
                                               clf_ori, clf_ss, clf_ig)
            temp_table = temp_instance.get_information()
            print "The actual label of following instance is ", y_test[index1[i]]
            display(temp_table)
    
# The most differentce of probability between 2 classifiers.
    print "The most difference of probability between mean and splitting is following: "
    proba_diff_interval = diff_proba[index1]
    
    most_diff_prob_index = np.argmax(proba_diff_interval)
    
    print "The most difference of probability is ", diff_proba[index1[most_diff_prob_index]]
    
    most_diff_prob_instance = instance(header[:class_index],X_test_ori[index1[most_diff_prob_index]], X_test_ss[index1[most_diff_prob_index]],
                                     X_test_ig[index1[most_diff_prob_index]], clf_ori, clf_ss, clf_ig)
    most_diff_prob_instance_table = most_diff_prob_instance.get_information()
    
    display(most_diff_prob_instance_table)
    
    print "The acutal label is ", y_test[index1[most_diff_prob_index]]

This is feature  Bare_Nuclei
This is the instances that IG and SS made different decisions between mean and splitting. 
3
The actual label of following instance is  1


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,10.0,1.98,1.16,-5.54,0.82,0.77
Cell_Size_Uniformity,4.0,0.28,0.16,3.36,0.18,0.1
Cell_Shape_Uniformity,2.0,-0.41,-0.64,0.11,-0.31,-0.52
Marginal_Adhesion,1.0,-0.63,-0.14,0.11,-0.12,-0.01
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,2.0,-0.43,0.09,0.67,-0.22,0.05
Bland_Chromatin,4.0,0.23,0.2,1.6,0.22,0.19
Normal_Nucleoli,3.0,0.04,-0.63,0.33,0.01,-0.23
Mitoses,10.0,4.91,2.69,-6.03,-0.55,0.43


3
The actual label of following instance is  1


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,3.0,-0.5,-1.16,-1.66,-0.21,-0.77
Cell_Size_Uniformity,6.0,0.94,0.82,5.05,0.6,0.51
Cell_Shape_Uniformity,4.0,0.27,0.0,0.22,0.2,0.0
Marginal_Adhesion,10.0,2.52,2.35,1.14,0.49,0.25
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,3.0,-0.15,0.27,1.0,-0.08,0.14
Bland_Chromatin,3.0,-0.18,-0.2,1.2,-0.17,-0.19
Normal_Nucleoli,4.0,0.37,-0.38,0.43,0.09,-0.14
Mitoses,1.0,-0.34,-1.03,-0.6,0.04,-0.16


3
The actual label of following instance is  0


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,6.0,0.56,-0.17,-3.32,0.23,-0.11
Cell_Size_Uniformity,3.0,-0.04,-0.16,2.52,-0.03,-0.1
Cell_Shape_Uniformity,3.0,-0.07,-0.32,0.16,-0.05,-0.26
Marginal_Adhesion,3.0,0.07,0.41,0.34,0.01,0.04
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,2.0,-0.43,0.09,0.67,-0.22,0.05
Bland_Chromatin,6.0,1.05,0.98,2.4,0.98,0.95
Normal_Nucleoli,1.0,-0.61,-1.14,0.11,-0.15,-0.41
Mitoses,1.0,-0.34,-1.03,-0.6,0.04,-0.16


The most difference of probability between mean and splitting is following: 
The most difference of probability is  0.335179990581


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,10.0,1.98,1.16,-5.54,0.82,0.77
Cell_Size_Uniformity,4.0,0.28,0.16,3.36,0.18,0.1
Cell_Shape_Uniformity,2.0,-0.41,-0.64,0.11,-0.31,-0.52
Marginal_Adhesion,1.0,-0.63,-0.14,0.11,-0.12,-0.01
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,2.0,-0.43,0.09,0.67,-0.22,0.05
Bland_Chromatin,4.0,0.23,0.2,1.6,0.22,0.19
Normal_Nucleoli,3.0,0.04,-0.63,0.33,0.01,-0.23
Mitoses,10.0,4.91,2.69,-6.03,-0.55,0.43


The acutal label is  1


In [22]:
# Bland_Chromatin
print "This is feature ", header[6]
current_feature = X_test_ori[:,6]

num_feature1 = num_test[6]

mean1 = mean[6]

split1 = splitting[6]

if num_feature1 == 0:
    
    print "There is no instance between mean and splitting point."
    
else:

    upper = max(mean1, split1)
    lower = min(mean1, split1)
    
    index1 = np.where(np.logical_and(current_feature>lower, current_feature<upper))[0]
    
    diff_decision_interval = diff_decision[index1]
    
    index1_different_decision = np.where(diff_decision_interval!=0)[0]
    
    if len(index1_different_decision)==0:
        print "There is no different decision made by 2 classifiers."
    else:
        
        print "This is the instances that IG and SS made different decisions between mean and splitting. "
        
        for i in index1_different_decision:
            
            print len(index1_different_decision)
            
            temp_instance = instance(header[:class_index],X_test_ori[index1[i]], 
                                               X_test_ss[index1[i]], X_test_ig[index1[i]], 
                                               clf_ori, clf_ss, clf_ig)
            temp_table = temp_instance.get_information()
            print "The actual label of following instance is ", y_test[index1[i]]
            display(temp_table)
    
# The most differentce of probability between 2 classifiers.
    print "The most difference of probability between mean and splitting is following: "
    proba_diff_interval = diff_proba[index1]
    
    most_diff_prob_index = np.argmax(proba_diff_interval)
    
    print "The most difference of probability is ", diff_proba[index1[most_diff_prob_index]]
    
    most_diff_prob_instance = instance(header[:class_index],X_test_ori[index1[most_diff_prob_index]], X_test_ss[index1[most_diff_prob_index]],
                                     X_test_ig[index1[most_diff_prob_index]], clf_ori, clf_ss, clf_ig)
    most_diff_prob_instance_table = most_diff_prob_instance.get_information()
    
    display(most_diff_prob_instance_table)
    
    print "The acutal label is ", y_test[index1[most_diff_prob_index]]

This is feature  Bland_Chromatin
There is no instance between mean and splitting point.


In [23]:
# Normal_Nucleoli
print "This is feature ", header[7]
current_feature = X_test_ori[:,7]

num_feature1 = num_test[7]

mean1 = mean[7]

split1 = splitting[7]

if num_feature1 == 0:
    
    print "There is no instance between mean and splitting point."
    
else:

    upper = max(mean1, split1)
    lower = min(mean1, split1)
    
    index1 = np.where(np.logical_and(current_feature>lower, current_feature<upper))[0]
    
    diff_decision_interval = diff_decision[index1]
    
    index1_different_decision = np.where(diff_decision_interval!=0)[0]
    
    if len(index1_different_decision)==0:
        print "There is no different decision made by 2 classifiers."
    else:
        
        print "This is the instances that IG and SS made different decisions between mean and splitting. "
        
        for i in index1_different_decision:
            
            print len(index1_different_decision)
            
            temp_instance = instance(header[:class_index],X_test_ori[index1[i]], 
                                               X_test_ss[index1[i]], X_test_ig[index1[i]], 
                                               clf_ori, clf_ss, clf_ig)
            temp_table = temp_instance.get_information()
            print "The actual label of following instance is ", y_test[index1[i]]
            display(temp_table)
    
# The most differentce of probability between 2 classifiers.
    print "The most difference of probability between mean and splitting is following: "
    proba_diff_interval = diff_proba[index1]
    
    most_diff_prob_index = np.argmax(proba_diff_interval)
    
    print "The most difference of probability is ", diff_proba[index1[most_diff_prob_index]]
    
    most_diff_prob_instance = instance(header[:class_index],X_test_ori[index1[most_diff_prob_index]], X_test_ss[index1[most_diff_prob_index]],
                                     X_test_ig[index1[most_diff_prob_index]], clf_ori, clf_ss, clf_ig)
    most_diff_prob_instance_table = most_diff_prob_instance.get_information()
    
    display(most_diff_prob_instance_table)
    
    print "The acutal label is ", y_test[index1[most_diff_prob_index]]

This is feature  Normal_Nucleoli
This is the instances that IG and SS made different decisions between mean and splitting. 
3
The actual label of following instance is  1


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,10.0,1.98,1.16,-5.54,0.82,0.77
Cell_Size_Uniformity,4.0,0.28,0.16,3.36,0.18,0.1
Cell_Shape_Uniformity,2.0,-0.41,-0.64,0.11,-0.31,-0.52
Marginal_Adhesion,1.0,-0.63,-0.14,0.11,-0.12,-0.01
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,2.0,-0.43,0.09,0.67,-0.22,0.05
Bland_Chromatin,4.0,0.23,0.2,1.6,0.22,0.19
Normal_Nucleoli,3.0,0.04,-0.63,0.33,0.01,-0.23
Mitoses,10.0,4.91,2.69,-6.03,-0.55,0.43


3
The actual label of following instance is  1


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,3.0,-0.5,-1.16,-1.66,-0.21,-0.77
Cell_Size_Uniformity,6.0,0.94,0.82,5.05,0.6,0.51
Cell_Shape_Uniformity,4.0,0.27,0.0,0.22,0.2,0.0
Marginal_Adhesion,10.0,2.52,2.35,1.14,0.49,0.25
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,3.0,-0.15,0.27,1.0,-0.08,0.14
Bland_Chromatin,3.0,-0.18,-0.2,1.2,-0.17,-0.19
Normal_Nucleoli,4.0,0.37,-0.38,0.43,0.09,-0.14
Mitoses,1.0,-0.34,-1.03,-0.6,0.04,-0.16


3
The actual label of following instance is  1


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,3.0,-0.5,-1.16,-1.66,-0.21,-0.77
Cell_Size_Uniformity,4.0,0.28,0.16,3.36,0.18,0.1
Cell_Shape_Uniformity,4.0,0.27,0.0,0.22,0.2,0.0
Marginal_Adhesion,10.0,2.52,2.35,1.14,0.49,0.25
Single_Epi_Cell_Size,5.0,0.81,1.1,-0.61,0.65,0.88
Bare_Nuclei,1.0,-0.71,-0.09,0.33,-0.36,-0.05
Bland_Chromatin,3.0,-0.18,-0.2,1.2,-0.17,-0.19
Normal_Nucleoli,3.0,0.04,-0.63,0.33,0.01,-0.23
Mitoses,1.0,-0.34,-1.03,-0.6,0.04,-0.16


The most difference of probability between mean and splitting is following: 
The most difference of probability is  0.335179990581


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,10.0,1.98,1.16,-5.54,0.82,0.77
Cell_Size_Uniformity,4.0,0.28,0.16,3.36,0.18,0.1
Cell_Shape_Uniformity,2.0,-0.41,-0.64,0.11,-0.31,-0.52
Marginal_Adhesion,1.0,-0.63,-0.14,0.11,-0.12,-0.01
Single_Epi_Cell_Size,3.0,-0.1,0.22,-0.37,-0.08,0.18
Bare_Nuclei,2.0,-0.43,0.09,0.67,-0.22,0.05
Bland_Chromatin,4.0,0.23,0.2,1.6,0.22,0.19
Normal_Nucleoli,3.0,0.04,-0.63,0.33,0.01,-0.23
Mitoses,10.0,4.91,2.69,-6.03,-0.55,0.43


The acutal label is  1


In [24]:
# Mitoses
print "This is feature ", header[8]
current_feature = X_test_ori[:,8]

num_feature1 = num_test[8]

mean1 = mean[8]

split1 = splitting[8]

if num_feature1 == 0:
    
    print "There is no instance between mean and splitting point."
    
else:

    upper = max(mean1, split1)
    lower = min(mean1, split1)
    
    index1 = np.where(np.logical_and(current_feature>lower, current_feature<upper))[0]
    
    diff_decision_interval = diff_decision[index1]
    
    index1_different_decision = np.where(diff_decision_interval!=0)[0]
    
    if len(index1_different_decision)==0:
        print "There is no different decision made by 2 classifiers."
    else:
        
        print "This is the instances that IG and SS made different decisions between mean and splitting. "
        print len(index1_different_decision)
        
        for i in index1_different_decision:
                     
            temp_instance = instance(header[:class_index],X_test_ori[index1[i]], 
                                               X_test_ss[index1[i]], X_test_ig[index1[i]], 
                                               clf_ori, clf_ss, clf_ig)
            temp_table = temp_instance.get_information()
            print "The actual label of following instance is ", y_test[index1[i]]
            display(temp_table)
    
# The most differentce of probability between 2 classifiers.
    print "The most difference of probability between mean and splitting is following: "
    print "This is the most differentce of probability between 2 classifiers."

    proba_diff_interval = diff_proba[index1]
    
    most_diff_prob_index = np.argmax(proba_diff_interval)
    
    print "The most difference of probability is ", diff_proba[index1[most_diff_prob_index]]
    
    most_diff_prob_instance = instance(header[:class_index],X_test_ori[index1[most_diff_prob_index]], X_test_ss[index1[most_diff_prob_index]],
                                     X_test_ig[index1[most_diff_prob_index]], clf_ori, clf_ss, clf_ig)
    most_diff_prob_instance_table = most_diff_prob_instance.get_information()
    
    display(most_diff_prob_instance_table)
    
    print "The acutal label is ", y_test[index1[most_diff_prob_index]]

This is feature  Mitoses
There is no different decision made by 2 classifiers.
The most difference of probability between mean and splitting is following: 
This is the most differentce of probability between 2 classifiers.
The most difference of probability is  0.0872682121408


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,7.0,0.92,0.17,-3.88,0.38,0.11
Cell_Size_Uniformity,4.0,0.28,0.16,3.36,0.18,0.1
Cell_Shape_Uniformity,5.0,0.6,0.32,0.27,0.46,0.26
Marginal_Adhesion,10.0,2.52,2.35,1.14,0.49,0.25
Single_Epi_Cell_Size,2.0,-0.55,-0.22,-0.24,-0.44,-0.18
Bare_Nuclei,10.0,1.79,1.55,3.33,0.9,0.81
Bland_Chromatin,3.0,-0.18,-0.2,1.2,-0.17,-0.19
Normal_Nucleoli,8.0,1.68,0.63,0.87,0.42,0.23
Mitoses,2.0,0.24,-0.62,-1.21,-0.03,-0.1


The acutal label is  1


In [25]:
diff_decision = y_predict_ss - y_predict_ig

index_diff_decision = np.where(diff_decision!=0)[0]

print len(index_diff_decision)

# if len(index_diff_decision) == 0:
    
#     print "There is no different decision between 2 classifiers."
    
# else:

#     print "The index of test instances that SS and IG make different decisions：", index_diff_decision
#     print "The probability that 2 classifiers made: "
#     print y_pred_prob_ss[index_diff_decision]
#     print y_pred_prob_ig[index_diff_decision]
    
    
#     for i in index_diff_decision:
    
#         most_diff_decision = instance(header[:class_index],X_test_ori[i], X_test_ss[i],
#                                      X_test_ig[i], clf_ori, clf_ss, clf_ig)
#         most_diff_decision_table = most_diff_decision.get_information()    
#         display(most_diff_decision_table)
    
    
    
y_pred_prob_ss_false = y_pred_prob_ss[:,0]
y_pred_prob_ig_false = y_pred_prob_ig[:,0]

abs_diff = abs(y_pred_prob_ss_false-y_pred_prob_ig_false)

max_diff_index = np.argmax(abs_diff)

print "Index: ", max_diff_index
print "Label: ", y_test[max_diff_index]
print max_diff_index, abs_diff[max_diff_index]


# print X_test_ori[max_diff_index], y_test[max_diff_index]

most_diff = instance(header[:class_index],X_test_ori[max_diff_index], X_test_ss[max_diff_index],
                                     X_test_ig[max_diff_index], clf_ori, clf_ss, clf_ig)
most_diff_table = most_diff.get_information()
display(most_diff_prob_instance_table)

4
Index:  104
Label:  1
104 0.335179990581


0,1,2,3,4,5,6
Features,Ori_value,ss_value,ig_value,w * v_ori,w * v_ss,w * v_ig
Clump_Thickness,7.0,0.92,0.17,-3.88,0.38,0.11
Cell_Size_Uniformity,4.0,0.28,0.16,3.36,0.18,0.1
Cell_Shape_Uniformity,5.0,0.6,0.32,0.27,0.46,0.26
Marginal_Adhesion,10.0,2.52,2.35,1.14,0.49,0.25
Single_Epi_Cell_Size,2.0,-0.55,-0.22,-0.24,-0.44,-0.18
Bare_Nuclei,10.0,1.79,1.55,3.33,0.9,0.81
Bland_Chromatin,3.0,-0.18,-0.2,1.2,-0.17,-0.19
Normal_Nucleoli,8.0,1.68,0.63,0.87,0.42,0.23
Mitoses,2.0,0.24,-0.62,-1.21,-0.03,-0.1


In [26]:
made_test = [np.mean(X, axis=0).tolist(), scale_.mns.tolist()]

X_mns = np.mean(X, axis=0)
X_std = np.std(X, axis=0)

made_test_ss = transform(made_test, X_mns, X_std)
made_test_ig = scale_.transform(made_test)

print clf_ss.predict(made_test_ss)
print clf_ig.predict(made_test_ig)

print clf_ss.predict_proba(made_test_ss)
print clf_ig.predict_proba(made_test_ig)

[0 0]
[0 0]
[[ 0.65823066  0.34176934]
 [ 0.64404638  0.35595362]]
[[ 0.71375378  0.28624622]
 [ 0.56719262  0.43280738]]
