In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split, ShuffleSplit
from classifiers import TransparentLogisticRegression
from matplotlib import pylab as pl
from scipy.sparse import diags
from IPython import display
from scale import decision_tree_scale

In [2]:
# breast-w
# source link: https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Original%29
# relevant paper: Multisurface method of pattern separation for medical diagnosis applied to breast cytology

dataset = "breast-w.csv"
class_index = 9
num_cols = 10
classes = ['benign', 'malignant']
read_cols = [i for i in range(num_cols) if i != class_index]
file_path = "D:\\IIT_Master\\2016 Spring\\CS597\\uci\\uci\\uci-tar\\nominal\\"+dataset

In [3]:
# ['Clump_Thickness' 'Cell_Size_Uniformity' 'Cell_Shape_Uniformity' 'Marginal_Adhesion' 'Single_Epi_Cell_Size' 'Bare_Nuclei' 'Bland_Chromatin'
#  'Normal_Nucleoli' 'Mitoses' 


with open(file_path, 'r') as f:
    header = f.readline()
    #print header
    #header = np.fromstring("a, b", dtype=np.str_, sep=',')
    header = np.array(header.split(','))
    feature_names = header[read_cols]
    
    print header

['Clump_Thickness' 'Cell_Size_Uniformity' 'Cell_Shape_Uniformity'
 'Marginal_Adhesion' 'Single_Epi_Cell_Size' 'Bare_Nuclei' 'Bland_Chromatin'
 'Normal_Nucleoli' 'Mitoses' 'Class\n']


In [4]:
# Loading the data and splitting the train, test

X = np.loadtxt(file_path, dtype=float, delimiter=",", skiprows=1, \
                   usecols=read_cols)
y = np.loadtxt(file_path, dtype=int, delimiter=",", skiprows=1, \
                   usecols=(class_index,), converters={class_index: lambda x: classes.index(x)})

num_inst, num_feat = np.shape(X)
print "The shape of X:",np.shape(X)

ss = ShuffleSplit(num_inst, n_iter=1, test_size=0.95, random_state=2)

for i, j in ss:
    train_index = i 
    test_index = j
    
y_train = y[train_index]
y_test = y[test_index]

print len(y_test)

The shape of X: (699L, 9L)
665


In [5]:
# Determine binary features
num_features = X.shape[1]
non_binary = []
binary = []
for i in range(num_features):
    if len(np.unique(X[:,i])) != 2:
        non_binary.append(i)
    else:
        binary.append(i)
        

In [6]:
# Original features
X_original = np.copy(X)

X_train_ori = X_original[train_index]
X_test_ori = X_original[test_index]

# Standard scale non binary features
# X_ss = scale(X)
X_ss = np.copy(X)

if len(non_binary) > 0:
    X_ss[:,non_binary]=scale(X[:,non_binary])
    
if len(binary) > 0: 
    print "binary features exist"
    X_b = X_ss[:,binary]
    X_b[X_b == 0] = -1
    X_ss[:,binary] = X_b
    
X_train_ss = X_ss[train_index]
X_test_ss = X_ss[test_index]

# Information gain scaling non binary features

X_ig = np.copy(X_original)
scale_ = decision_tree_scale()

X_train_ig = X_ig[train_index]
X_test_ig = X_ig[test_index]

if len(non_binary) > 0: 
    print "IG Scale for non_binary features -- Train"
    X_train_ig[:,non_binary]=scale_.fit_transform(X_train_ig[:,non_binary], y_train)

if len(binary) > 0: 
    print "binary features exist"
    X_b = X_train_ig[:,binary]
    X_b[X_b == 0] = -1
    X_train_ig[:,binary] = X_b
    
if len(non_binary) > 0:
    
    print "IG Scale for non_binary features -- Test"
    X_test_ig[:,non_binary]=scale_.transform(X_test_ig[:,non_binary])
    
if len(binary) > 0:
    
    print "binary features exist"
    X_b = X_test_ig[:,binary]
    X_b[X_b == 0] = -1
    X_test_ig[:,binary] = X_b

IG Scale for non_binary features -- Train
IG Scale for non_binary features -- Test


In [7]:
print "The mean of each features:"
print np.mean(X, axis=0)
print "The best splitting of each features"
print scale_.mns

The mean of each features:
[ 4.41773963  3.13447783  3.2074392   2.80686695  3.21602289  3.54465593
  3.43776824  2.86695279  1.58941345]
The best splitting of each features
[ 4.5  2.5  2.5  1.5  3.5  3.   4.5  3.5  1.5]


In [8]:
clf_ori = TransparentLogisticRegression()
clf_ss = TransparentLogisticRegression()
clf_ig = TransparentLogisticRegression()

In [9]:
# fit train instances for each classifier

clf_ori.fit(X_train_ori, y_train)
clf_ss.fit(X_train_ss, y_train)
clf_ig.fit(X_train_ig, y_train)


TransparentLogisticRegression(C=1.0, class_weight=None, dual=False,
               fit_intercept=True, intercept_scaling=1, max_iter=100,
               multi_class='ovr', n_jobs=1, penalty='l2',
               random_state=None, solver='liblinear', tol=0.0001,
               verbose=0, warm_start=False)

In [10]:
# print the weights for each classifiers

print clf_ori.coef_
print clf_ss.coef_
print clf_ig.coef_
print ""

print clf_ori.intercept_ 
print clf_ss.intercept_ 
print clf_ig.intercept_ 

[[-0.38583135  0.17938236  0.23548352  0.0232745  -0.70374158  1.33136849
  -0.50101944  0.43533379 -0.41052578]]
[[ 0.63586749  0.60064158  0.66842156  0.62853418  0.19613929  0.92188635
   0.47273588  0.72327142  0.16628945]]
[[ 0.69309021  0.5936199   0.68854328  0.52035959  0.25596195  0.85901456
   0.60150623  0.82816488  0.13214529]]

[-1.26210743]
[-0.29890278]
[-0.35049499]


In [11]:
# Predict test instances for each classifier

y_predict_ori = clf_ori.predict(X_test_ori)
y_pred_prob_ori = clf_ori.predict_proba(X_test_ori)
neg_evi_ori, pos_evi_ori = clf_ori.predict_evidences(X_test_ori)

y_predict_ss = clf_ss.predict(X_test_ss)
y_pred_prob_ss = clf_ss.predict_proba(X_test_ss)
neg_evi_ss, pos_evi_ss = clf_ss.predict_evidences(X_test_ss)

y_predict_ig = clf_ig.predict(X_test_ig)
y_pred_prob_ig = clf_ig.predict_proba(X_test_ig)
neg_evi_ig, pos_evi_ig = clf_ig.predict_evidences(X_test_ig)

print accuracy_score(y_test,y_predict_ori)
print accuracy_score(y_test,y_predict_ss)
print accuracy_score(y_test,y_predict_ig)

0.867669172932
0.96992481203
0.971428571429


In [12]:
# most negative, "benign" -- Probability

Most_negative_1 = np.argmax(y_pred_prob_ori[:,0])
print "Original: Most negative(benign) instances based on probability"
print "Index of test: ",Most_negative_1
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_negative_1]
print X_test_ss[Most_negative_1]
print X_test_ig[Most_negative_1]
print ""

Most_negative_2 = np.argmax(y_pred_prob_ss[:,0])
print "Standard scaling: Most negative(benign) instances based on probability"
print "Index of test: ",Most_negative_2
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_negative_2]
print X_test_ss[Most_negative_2]
print X_test_ig[Most_negative_2]
print ""

Most_negative_3 = np.argmax(y_pred_prob_ig[:,0])
print "Information gain scaling: Most negative(benign) instances based on probability"
print "Index of test: ",Most_negative_3
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_negative_3]
print X_test_ss[Most_negative_3]
print X_test_ig[Most_negative_3]
print ""

Original: Most negative(benign) instances based on probability
Index of test:  495
The features are (Original, standard scaling, information gain scaling)
[ 10.  10.  10.  10.  10.   1.   8.   8.   8.]
[ 1.983939    2.25152563  2.28722218  2.52095546  3.06590584 -0.70699139
  1.87236122  1.68216723  3.74045801]
[ 1.9794179   2.5227748   2.65899235  2.42276144  2.79032397 -0.49324598
  1.35009522  1.53422494  3.04921869]

Standard scaling: Most negative(benign) instances based on probability
Index of test:  32
The features are (Original, standard scaling, information gain scaling)
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.]
[-1.2146669  -0.69999505 -0.74329904 -0.63324716 -1.00149476 -0.70699139
 -1.00047147 -0.61182504 -0.34391178]
[-1.25962958 -0.50455496 -0.53179847 -0.14251538 -1.07320153 -0.49324598
 -1.35009522 -0.85234719 -0.23455528]

Information gain scaling: Most negative(benign) instances based on probability
Index of test:  32
The features are (Original, standard scaling, informat

In [13]:
# most positive, "malignant" -- Probability

Most_positive_1 = np.argmax(y_pred_prob_ori[:,1])


print "Original: Most positive(malignant) instances based on probability"
print "Index of test: ",Most_positive_1
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_positive_1]
print X_test_ss[Most_positive_1]
print X_test_ig[Most_positive_1]
print ""

Most_positive_2 = np.argmax(y_pred_prob_ss[:,1])
print "Standard scaling: Most positive(malignant) instances based on probability"
print "Index of test: ",Most_positive_2
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_positive_2]
print X_test_ss[Most_positive_2]
print X_test_ig[Most_positive_2]
print ""

Most_positive_3 = np.argmax(y_pred_prob_ig[:,1])
print "Information gain scaling: Most positive(malignant) instances based on probability"
print "Index of test: ",Most_positive_3
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_positive_3]
print X_test_ss[Most_positive_3]
print X_test_ig[Most_positive_3]
print ""




Original: Most positive(malignant) instances based on probability
Index of test:  62
The features are (Original, standard scaling, information gain scaling)
[  6.  10.  10.  10.   4.  10.   7.  10.   1.]
[ 0.56233637  2.25152563  2.28722218  2.52095546  0.35430544  1.79351268
  1.46195655  2.33759359 -0.34391178]
[ 0.53984125  2.5227748   2.65899235  2.42276144  0.21464031  1.72636091
  0.96435373  2.21610269 -0.23455528]

Standard scaling: Most positive(malignant) instances based on probability
Index of test:  554
The features are (Original, standard scaling, information gain scaling)
[ 10.  10.  10.  10.   5.  10.  10.  10.   7.]
[ 1.983939    2.25152563  2.28722218  2.52095546  0.80623884  1.79351268
  2.69317056  2.33759359  3.15697661]
[ 1.9794179   2.5227748   2.65899235  2.42276144  0.64392092  1.72636091
  2.12157821  2.21610269  2.58010812]

Information gain scaling: Most positive(malignant) instances based on probability
Index of test:  554
The features are (Original, standar

In [14]:
# most negative, "benign" -- Evidence

negative_evi_index_ori = np.argmax(abs(neg_evi_ori))
print "Original: Most negative(benign) instances based on evidence"
print "Index of test: ",negative_evi_index_ori
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[negative_evi_index_ori]
print X_test_ss[negative_evi_index_ori]
print X_test_ig[negative_evi_index_ori]
print ""

negative_evi_index_ss = np.argmax(abs(neg_evi_ss))
print "Standard scaling: Most negative(benign) instances based on evidence"
print "Index of test: ",negative_evi_index_ss
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[negative_evi_index_ss]
print X_test_ss[negative_evi_index_ss]
print X_test_ig[negative_evi_index_ss]
print ""

negative_evi_index_ig = np.argmax(abs(neg_evi_ig))
print "Information gain scaling: Most negative(benign) instances based on evidence"
print "Index of test: ",negative_evi_index_ig
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[negative_evi_index_ig]
print X_test_ss[negative_evi_index_ig]
print X_test_ig[negative_evi_index_ig]
print ""


Original: Most negative(benign) instances based on evidence
Index of test:  579
The features are (Original, standard scaling, information gain scaling)
[  9.  10.  10.  10.  10.   5.  10.  10.  10.]
[ 1.62853834  2.25152563  2.28722218  2.52095546  3.06590584  0.40434375
  2.69317056  2.33759359  4.9074208 ]
[ 1.61952374  2.5227748   2.65899235  2.42276144  2.79032397  0.49324598
  2.12157821  2.21610269  3.98743983]

Standard scaling: Most negative(benign) instances based on evidence
Index of test:  32
The features are (Original, standard scaling, information gain scaling)
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.]
[-1.2146669  -0.69999505 -0.74329904 -0.63324716 -1.00149476 -0.70699139
 -1.00047147 -0.61182504 -0.34391178]
[-1.25962958 -0.50455496 -0.53179847 -0.14251538 -1.07320153 -0.49324598
 -1.35009522 -0.85234719 -0.23455528]

Information gain scaling: Most negative(benign) instances based on evidence
Index of test:  32
The features are (Original, standard scaling, information gain 

In [15]:
# most positive, "mallignant" -- Evidence 

positive_evi_index_ori = np.argmax(pos_evi_ori)
print "Original: Most positive(malignant) instances based on evidence"
print "Index of test: ",positive_evi_index_ori
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[positive_evi_index_ori]
print X_test_ss[positive_evi_index_ori]
print X_test_ig[positive_evi_index_ori]
print ""

positive_evi_index_ss = np.argmax(pos_evi_ss)
print "Standard scaling: Most positive(malignant) instances based on evidence"
print "Index of test: ",positive_evi_index_ss
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[positive_evi_index_ss]
print X_test_ss[positive_evi_index_ss]
print X_test_ig[positive_evi_index_ss]
print ""

positive_evi_index_ig = np.argmax(pos_evi_ig)
print "Information gain scaling: Most positive(malignant) instances based on evidence"
print "Index of test: ",positive_evi_index_ig
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[positive_evi_index_ig]
print X_test_ss[positive_evi_index_ig]
print X_test_ig[positive_evi_index_ig]
print ""

Original: Most positive(malignant) instances based on evidence
Index of test:  62
The features are (Original, standard scaling, information gain scaling)
[  6.  10.  10.  10.   4.  10.   7.  10.   1.]
[ 0.56233637  2.25152563  2.28722218  2.52095546  0.35430544  1.79351268
  1.46195655  2.33759359 -0.34391178]
[ 0.53984125  2.5227748   2.65899235  2.42276144  0.21464031  1.72636091
  0.96435373  2.21610269 -0.23455528]

Standard scaling: Most positive(malignant) instances based on evidence
Index of test:  554
The features are (Original, standard scaling, information gain scaling)
[ 10.  10.  10.  10.   5.  10.  10.  10.   7.]
[ 1.983939    2.25152563  2.28722218  2.52095546  0.80623884  1.79351268
  2.69317056  2.33759359  3.15697661]
[ 1.9794179   2.5227748   2.65899235  2.42276144  0.64392092  1.72636091
  2.12157821  2.21610269  2.58010812]

Information gain scaling: Most positive(malignant) instances based on evidence
Index of test:  554
The features are (Original, standard scaling

In [16]:
# unc_1 Top 1 uncertain instances

uncertains_ori = np.min(y_pred_prob_ori, axis=1)
uis_ori = np.argsort(uncertains_ori)[::-1]
top10_uis_ori = uis_ori[:10]
print "Original: the most uncertain instance based on probability"
print "Index of test: ",uis_ori[0]
print "Actual label", y_test[uis_ori[0]]
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[uis_ori[0]]
print X_test_ss[uis_ori[0]]
print X_test_ig[uis_ori[0]]
print ""

uncertains_ss = np.min(y_pred_prob_ss, axis=1)
uis_ss = np.argsort(uncertains_ss)[::-1]
top10_uis_ss = uis_ss[:10]
print "standard scaling: the most uncertain instance based on probability"
print "Index of test: ",uis_ss[0]
print "Actual label", y_test[uis_ss[0]]
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[uis_ss[0]]
print X_test_ss[uis_ss[0]]
print X_test_ig[uis_ss[0]]
print ""

uncertains_ig = np.min(y_pred_prob_ig, axis=1)
uis_ig = np.argsort(uncertains_ig)[::-1]
top10_uis_ig = uis_ig[:10]
print "standard scaling: the most uncertain instance based on probability"
print "Index of test: ",uis_ig[0]
print "Actual label", y_test[uis_ig[0]]
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[uis_ig[0]]
print X_test_ss[uis_ig[0]]
print X_test_ig[uis_ig[0]]
print ""

print "The splitting point: ", scale_.mns

Original: the most uncertain instance based on probability
Index of test:  207
Actual label 1
The features are (Original, standard scaling, information gain scaling)
[ 7.  2.  4.  1.  3.  4.  3.  3.  1.]
[ 0.91773703 -0.37204831  0.2668747  -0.63324716 -0.09762796  0.12650997
 -0.17966213  0.04360132 -0.34391178]
[ 0.89973541 -0.16818499  0.53179847 -0.14251538 -0.21464031  0.24662299
 -0.57861224 -0.17046944 -0.23455528]

standard scaling: the most uncertain instance based on probability
Index of test:  159
Actual label 1
The features are (Original, standard scaling, information gain scaling)
[ 2.  3.  4.  4.  2.  5.  2.  5.  1.]
[-0.85926625 -0.04410156  0.2668747   0.41815371 -0.54956136  0.40434375
 -0.5900668   0.69902769 -0.34391178]
[-0.89973541  0.16818499  0.53179847  0.71257689 -0.64392092  0.49324598
 -0.96435373  0.51140831 -0.23455528]

standard scaling: the most uncertain instance based on probability
Index of test:  154
Actual label 1
The features are (Original, standard

In [17]:
# The indices of top 10 uncertain instances for each classifier

print top10_uis_ori
print top10_uis_ss
print top10_uis_ig


[207 154 131 504 384 158  88 145 523 333]
[159 559 154 227 207 605   0 461 256 178]
[154 159 559 207 116 178 605 637   0 575]


In [18]:
# unc_ce from Top 10 uncertain instances

min_evidence_top10_ori = np.min([abs(neg_evi_ori[top10_uis_ori]),abs(pos_evi_ori[top10_uis_ori])], axis=0)
index_ce_ori = np.argmax(min_evidence_top10_ori)
print "Original: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ori[index_ce_ori]
print "Actual label", y_test[top10_uis_ori[index_ce_ori]]
print X_test_ori[top10_uis_ori[index_ce_ori]]
print X_test_ss[top10_uis_ori[index_ce_ori]]
print X_test_ig[top10_uis_ori[index_ce_ori]]

min_evidence_top10_ss = np.min([abs(neg_evi_ss[top10_uis_ss]),abs(pos_evi_ss[top10_uis_ss])], axis=0)
index_ce_ss = np.argmax(min_evidence_top10_ss)
print "Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ss[index_ce_ss]
print "Actual label", y_test[top10_uis_ss[index_ce_ss]]
print X_test_ori[top10_uis_ss[index_ce_ss]]
print X_test_ss[top10_uis_ss[index_ce_ss]]
print X_test_ig[top10_uis_ss[index_ce_ss]]

min_evidence_top10_ig = np.min([abs(neg_evi_ig[top10_uis_ig]),abs(pos_evi_ig[top10_uis_ig])], axis=0)
index_ce_ig = np.argmax(min_evidence_top10_ig)
print "Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ig[index_ce_ig]
print "Actual label", y_test[top10_uis_ig[index_ce_ig]]
print X_test_ori[top10_uis_ig[index_ce_ig]]
print X_test_ss[top10_uis_ig[index_ce_ig]]
print X_test_ig[top10_uis_ig[index_ce_ig]]


Original: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  384
Actual label 1
[  8.   7.   4.   4.   5.   3.   5.  10.   1.]
[ 1.27313768  1.26768541  0.2668747   0.41815371  0.80623884 -0.15132382
  0.64114721  2.33759359 -0.34391178]
[ 1.25962958  1.51366488  0.53179847  0.71257689  0.64392092  0.
  0.19287075  2.21610269 -0.23455528]
Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  605
Actual label 0
[ 3.  1.  1.  3.  8.  1.  5.  8.  1.]
[-0.50386559 -0.69999505 -0.74329904  0.06768675  2.16203904 -0.70699139
  0.64114721  1.68216723 -0.34391178]
[-0.53984125 -0.50455496 -0.53179847  0.42754614  1.93176275 -0.49324598
  0.19287075  1.53422494 -0.23455528]
Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  0
Actual label 1
[ 10.   2.   2.   1.   2.   6.   1.   1.   2.]
[ 1.983939   -0.37204831 -0.40657446 -0.63324716 -0.

In [19]:
# unc_ie from Top 10 uncertain instances 

max_evidence_top10_ori = np.max([abs(neg_evi_ori[top10_uis_ori]),abs(pos_evi_ori[top10_uis_ori])], axis=0)
index_ie_ori = np.argmin(max_evidence_top10_ori)
print "Original: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ori[index_ie_ori]
print "Actual label", y_test[top10_uis_ori[index_ie_ori]]
print X_test_ori[top10_uis_ori[index_ie_ori]]
print X_test_ss[top10_uis_ori[index_ie_ori]]
print X_test_ig[top10_uis_ori[index_ie_ori]]

max_evidence_top10_ss = np.max([abs(neg_evi_ss[top10_uis_ss]),abs(pos_evi_ss[top10_uis_ss])], axis=0)
index_ie_ss = np.argmin(max_evidence_top10_ss)
print "Standard scaling: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ss[index_ie_ss]
print "Actual label", y_test[top10_uis_ss[index_ie_ss]]
print X_test_ori[top10_uis_ss[index_ie_ss]]
print X_test_ss[top10_uis_ss[index_ie_ss]]
print X_test_ig[top10_uis_ss[index_ie_ss]]

max_evidence_top10_ig = np.max([abs(neg_evi_ig[top10_uis_ig]),abs(pos_evi_ig[top10_uis_ig])], axis=0)
index_ie_ig = np.argmin(max_evidence_top10_ig)
print "Standard scaling: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ig[index_ie_ig]
print "Actual label", y_test[top10_uis_ig[index_ie_ig]]
print X_test_ori[top10_uis_ig[index_ie_ig]]
print X_test_ss[top10_uis_ig[index_ie_ig]]
print X_test_ig[top10_uis_ig[index_ie_ig]]

Original: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  145
Actual label 0
[ 1.  1.  1.  1.  1.  1.  1.  3.  1.]
[-1.2146669  -0.69999505 -0.74329904 -0.63324716 -1.00149476 -0.70699139
 -1.00047147  0.04360132 -0.34391178]
[-1.25962958 -0.50455496 -0.53179847 -0.14251538 -1.07320153 -0.49324598
 -1.35009522 -0.17046944 -0.23455528]
Standard scaling: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  154
Actual label 1
[ 5.  3.  3.  3.  2.  3.  4.  4.  1.]
[ 0.20693572 -0.04410156 -0.06984988  0.06768675 -0.54956136 -0.15132382
  0.23074254  0.37131451 -0.34391178]
[ 0.17994708  0.16818499  0.17726616  0.42754614 -0.64392092  0.
 -0.19287075  0.17046944 -0.23455528]
Standard scaling: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  154
Actual label 1
[ 5.  3.  3.  3.  2.  3.  4.  4.  1.]
[ 0.20693572 -0.04410156 -0.06984988  0.06768675 -0.54956136 -0.15132382
  0.

In [20]:
# Least negative among Top 10 possitive 

top_positive_index_ori = np.argsort(pos_evi_ori)[::-1]
tp_ori = top_positive_index_ori[:10]
neg_info_ori = neg_evi_ori[tp_ori]
index_least_neg_ori = np.argmin(neg_info_ori)
print "Index of test:",tp_ori[index_least_neg_ori]
print "Actual label", y_test[tp_ori[index_least_neg_ori]]
print X_test_ori[tp_ori[index_least_neg_ori]]
print X_test_ss[tp_ori[index_least_neg_ori]]
print X_test_ig[tp_ori[index_least_neg_ori]]
print ""

top_positive_index_ss = np.argsort(pos_evi_ss)[::-1]
tp_ss = top_positive_index_ss[:10]
neg_info_ss = neg_evi_ss[tp_ss]
index_least_neg_ss = np.argmin(neg_info_ss)
print "Index of test:",tp_ss[index_least_neg_ss]
print "Actual label", y_test[tp_ss[index_least_neg_ss]]
print X_test_ori[tp_ss[index_least_neg_ss]]
print X_test_ss[tp_ss[index_least_neg_ss]]
print X_test_ig[tp_ss[index_least_neg_ss]]
print ""

top_positive_index_ig = np.argsort(pos_evi_ig)[::-1]
tp_ig = top_positive_index_ig[:10]
neg_info_ig = neg_evi_ig[tp_ig]
index_least_neg_ig = np.argmin(neg_info_ig)
print "Index of test:",tp_ig[index_least_neg_ig]
print "Actual label", y_test[tp_ig[index_least_neg_ig]]
print X_test_ori[tp_ig[index_least_neg_ig]]
print X_test_ss[tp_ig[index_least_neg_ig]]
print X_test_ig[tp_ig[index_least_neg_ig]]

Index of test: 356
Actual label 1
[ 10.  10.  10.  10.  10.  10.   4.  10.  10.]
[ 1.983939    2.25152563  2.28722218  2.52095546  3.06590584  1.79351268
  0.23074254  2.33759359  4.9074208 ]
[ 1.9794179   2.5227748   2.65899235  2.42276144  2.79032397  1.72636091
 -0.19287075  2.21610269  3.98743983]

Index of test: 450
Actual label 1
[  9.  10.  10.  10.  10.  10.  10.  10.   1.]
[ 1.62853834  2.25152563  2.28722218  2.52095546  3.06590584  1.79351268
  2.69317056  2.33759359 -0.34391178]
[ 1.61952374  2.5227748   2.65899235  2.42276144  2.79032397  1.72636091
  2.12157821  2.21610269 -0.23455528]

Index of test: 356
Actual label 1
[ 10.  10.  10.  10.  10.  10.   4.  10.  10.]
[ 1.983939    2.25152563  2.28722218  2.52095546  3.06590584  1.79351268
  0.23074254  2.33759359  4.9074208 ]
[ 1.9794179   2.5227748   2.65899235  2.42276144  2.79032397  1.72636091
 -0.19287075  2.21610269  3.98743983]


In [21]:
# The indices of top 10 positive based on positive evidence

print tp_ori
print tp_ss
print tp_ig


[450 642  62 356 102 554 248 474 166 269]
[554 248 450 356 269 102 579 166 474 642]
[554 450 248 356 579 102 269 166 474 422]


In [22]:
# Least positive among Top 10 negative instances

top_negative_index_ori = np.argsort(abs(neg_evi_ori))[::-1]
tn_ori = top_negative_index_ori[:10]
pos_info_ori = pos_evi_ori[tp_ori]
index_least_pos_ori = np.argmin(pos_info_ori)
print "Index of test:",tn_ori[index_least_pos_ori]
print "Actual label", y_test[tn_ori[index_least_pos_ori]]
print X_test_ori[tn_ori[index_least_pos_ori]]
print X_test_ss[tn_ori[index_least_pos_ori]]
print X_test_ig[tn_ori[index_least_pos_ori]]
print ""

top_negative_index_ss = np.argsort(abs(neg_evi_ss))[::-1]
tn_ss = top_negative_index_ss[:10]
pos_info_ss = pos_evi_ss[tp_ss]
index_least_pos_ss = np.argmin(pos_info_ss)
print "Index of test:",tn_ss[index_least_pos_ss]
print "Actual label", y_test[tn_ss[index_least_pos_ss]]
print X_test_ori[tn_ss[index_least_pos_ss]]
print X_test_ss[tn_ss[index_least_pos_ss]]
print X_test_ig[tn_ss[index_least_pos_ss]]
print ""

top_negative_index_ig = np.argsort(abs(neg_evi_ig))[::-1]
tn_ig = top_negative_index_ig[:10]
pos_info_ig = pos_evi_ig[tp_ig]
index_least_pos_ig = np.argmin(pos_info_ig)
print "Index of test:",tn_ig[index_least_pos_ig]
print "Actual label", y_test[tn_ig[index_least_pos_ig]]
print X_test_ori[tn_ig[index_least_pos_ig]]
print X_test_ss[tn_ig[index_least_pos_ig]]
print X_test_ig[tn_ig[index_least_pos_ig]]
print ""


Index of test: 579
Actual label 1
[  9.  10.  10.  10.  10.   5.  10.  10.  10.]
[ 1.62853834  2.25152563  2.28722218  2.52095546  3.06590584  0.40434375
  2.69317056  2.33759359  4.9074208 ]
[ 1.61952374  2.5227748   2.65899235  2.42276144  2.79032397  0.49324598
  2.12157821  2.21610269  3.98743983]

Index of test: 413
Actual label 0
[ 1.  1.  1.  1.  2.  1.  1.  1.  1.]
[-1.2146669  -0.69999505 -0.74329904 -0.63324716 -0.54956136 -0.70699139
 -1.00047147 -0.61182504 -0.34391178]
[-1.25962958 -0.50455496 -0.53179847 -0.14251538 -0.64392092 -0.49324598
 -1.35009522 -0.85234719 -0.23455528]

Index of test: 120
Actual label 0
[ 1.  1.  1.  1.  2.  1.  1.  1.  1.]
[-1.2146669  -0.69999505 -0.74329904 -0.63324716 -0.54956136 -0.70699139
 -1.00047147 -0.61182504 -0.34391178]
[-1.25962958 -0.50455496 -0.53179847 -0.14251538 -0.64392092 -0.49324598
 -1.35009522 -0.85234719 -0.23455528]



In [23]:
# The indices of top 10 negative based on negative evidence

print tn_ori
print tn_ss
print tn_ig


[579 495 652 356 308 532 550 248 534 450]
[288 127 180  32 285 643 277 640 540 413]
[ 32 288 180 127 372 349  95 640 432 120]
