In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split, ShuffleSplit
from classifiers import TransparentLogisticRegression
from matplotlib import pylab as pl
from scipy.sparse import diags
from IPython import display
from scale import decision_tree_scale

In [2]:
#diabetes  http://archive.ics.uci.edu/ml/datasets/Pima+Indians+Diabetes
# ['preg' 'plas' 'pres' 'skin' 'insu' 'mass' 'pedi' 'age']
# preg: the # of pregnant 
# plas: Plasma glucose concentration a 2 hours in an oral glucose tolerance test 
# pres: Diastolic blood pressure
# skin: Triceps skin fold thickness
# insu: 2-Hour serum insulin
# mass: Body mass index
# pedi: Diabetes pedigree function 
# Age 

dataset = "diabetes.csv"
class_index = 8
num_cols = 9
classes= ['tested_negative', 'tested_positive']
read_cols = [i for i in range(num_cols) if i != class_index]
file_path = "D:\\IIT_Master\\2016 Spring\\CS597\\uci\\uci\\uci-tar\\nominal\\"+dataset

In [3]:
with open(file_path, 'r') as f:
    header = f.readline()
    #print header
    #header = np.fromstring("a, b", dtype=np.str_, sep=',')
    header = np.array(header.split(','))
    feature_names = header[read_cols]
    
    print header

['preg' 'plas' 'pres' 'skin' 'insu' 'mass' 'pedi' 'age' 'class\n']


In [4]:
# Loading the data and splitting the train, test

X = np.loadtxt(file_path, dtype=float, delimiter=",", skiprows=1, \
                   usecols=read_cols)
y = np.loadtxt(file_path, dtype=int, delimiter=",", skiprows=1, \
                   usecols=(class_index,), converters={class_index: lambda x: classes.index(x)})

num_inst, num_feat = np.shape(X)
print "The shape of X:",np.shape(X)
ss = ShuffleSplit(num_inst, n_iter=1, test_size=0.95, random_state=40)

for i, j in ss:
    train_index = i 
    test_index = j
    
y_train = y[train_index]
y_test = y[test_index]

print len(y_test)

The shape of X: (768L, 8L)
730


In [5]:
# Determine binary features
num_features = X.shape[1]
non_binary = []
binary = []
for i in range(num_features):
    if len(np.unique(X[:,i])) != 2:
        non_binary.append(i)
    else:
        binary.append(i)

In [6]:
# Original features
X_original = np.copy(X)

X_train_ori = X_original[train_index]
X_test_ori = X_original[test_index]

# Standard scale non binary features

X_ss = np.copy(X)
# X_ss = scale(X)

if len(non_binary) > 0:
    X_ss[:,non_binary]=scale(X[:,non_binary])
    
if len(binary) > 0: 
    print "binary features exist"
    X_b = X_ss[:,binary]
    X_b[X_b == 0] = -1
    X_ss[:,binary] = X_b
    
X_train_ss = X_ss[train_index]
X_test_ss = X_ss[test_index]

# Information gain scaling non binary features

X_ig = np.copy(X_original)
scale_ = decision_tree_scale()

X_train_ig = X_ig[train_index]
X_test_ig = X_ig[test_index]

if len(non_binary) > 0: 
    print "IG Scale for non_binary features -- Train"
    X_train_ig[:,non_binary]=scale_.fit_transform(X_train_ig[:,non_binary], y_train)

if len(binary) > 0: 
    print "binary features exist"
    X_b = X_train_ig[:,binary]
    X_b[X_b == 0] = -1
    X_train_ig[:,binary] = X_b
    
if len(non_binary) > 0:
    
    print "IG Scale for non_binary features -- Test"
    X_test_ig[:,non_binary]=scale_.transform(X_test_ig[:,non_binary])
    
if len(binary) > 0:
    
    print "binary features exist"
    print binary
    X_b = X_test_ig[:,binary]
    X_b[X_b == 0] = -1
    X_test_ig[:,binary] = X_b

print header[non_binary]

IG Scale for non_binary features -- Train
IG Scale for non_binary features -- Test
['preg' 'plas' 'pres' 'skin' 'insu' 'mass' 'pedi' 'age']


In [7]:
print "The mean of each features:"
print np.mean(X, axis=0)
print "The best splitting of each features"
print scale_.mns

The mean of each features:
[   3.84505208  120.89453125   69.10546875   20.53645833   79.79947917
   31.99257812    0.4718763    33.24088542]
The best splitting of each features
[   6.5    145.     100.      36.5    426.5     25.45     0.689   32.5  ]


In [8]:
clf_ori = TransparentLogisticRegression()
clf_ss = TransparentLogisticRegression()
clf_ig = TransparentLogisticRegression()

In [9]:
# fit train instances for each classifier

clf_ori.fit(X_train_ori, y_train)
clf_ss.fit(X_train_ss, y_train)
clf_ig.fit(X_train_ig, y_train)

TransparentLogisticRegression(C=1.0, class_weight=None, dual=False,
               fit_intercept=True, intercept_scaling=1, max_iter=100,
               multi_class='ovr', n_jobs=1, penalty='l2',
               random_state=None, solver='liblinear', tol=0.0001,
               verbose=0, warm_start=False)

In [10]:
# print the weights for each classifiers

print clf_ori.coef_
print clf_ss.coef_
print clf_ig.coef_
print ""

print clf_ori.intercept_ 
print clf_ss.intercept_ 
print clf_ig.intercept_

[[ 0.04930588  0.02430812 -0.01881217  0.01303152 -0.00316046 -0.0452861
   0.82635034  0.00118166]]
[[ 0.20000531  0.98910626  0.00593138  0.02499525 -0.31058147  0.28730722
   0.68253657  0.33581909]]
[[ 0.29559366  0.98362216  0.10191207 -0.09511427 -0.34285464  0.40341875
   0.71489413  0.31287842]]

[-1.07857429]
[-0.57192937]
[-0.00846264]


In [11]:
# Predict test instances for each classifier

y_predict_ori = clf_ori.predict(X_test_ori)
y_pred_prob_ori = clf_ori.predict_proba(X_test_ori)
neg_evi_ori, pos_evi_ori = clf_ori.predict_evidences(X_test_ori)

y_predict_ss = clf_ss.predict(X_test_ss)
y_pred_prob_ss = clf_ss.predict_proba(X_test_ss)
neg_evi_ss, pos_evi_ss = clf_ss.predict_evidences(X_test_ss)

y_predict_ig = clf_ig.predict(X_test_ig)
y_pred_prob_ig = clf_ig.predict_proba(X_test_ig)
neg_evi_ig, pos_evi_ig = clf_ig.predict_evidences(X_test_ig)

print accuracy_score(y_test,y_predict_ori)
print accuracy_score(y_test,y_predict_ss)
print accuracy_score(y_test,y_predict_ig)

0.680821917808
0.756164383562
0.754794520548


In [12]:
# most negative, "tested_negative" -- Probability

Most_negative_1 = np.argmax(y_pred_prob_ori[:,0])
print "Original: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_1
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_negative_1]
print X_test_ss[Most_negative_1]
print X_test_ig[Most_negative_1]
print ""

Most_negative_2 = np.argmax(y_pred_prob_ss[:,0])
print "Standard scaling: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_2
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_negative_2]
print X_test_ss[Most_negative_2]
print X_test_ig[Most_negative_2]
print ""

Most_negative_3 = np.argmax(y_pred_prob_ig[:,0])
print "Information gain scaling: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_3
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_negative_3]
print X_test_ss[Most_negative_3]
print X_test_ig[Most_negative_3]
print ""

Original: Most negative(tested_negative) instances based on probability
Index of test:  616
The features are (Original, standard scaling, information gain scaling)
[  5.      0.     80.     32.      0.     41.      0.346  37.   ]
[ 0.3429808  -3.78365371  0.56322275  0.71908574 -0.69289057  1.14321519
 -0.38016137  0.31985461]
[-0.38746831 -4.0085737  -0.6325804  -0.18122696 -1.14222354  1.60215316
 -0.8870892   0.41045451]

Standard scaling: Most negative(tested_negative) instances based on probability
Index of test:  238
The features are (Original, standard scaling, information gain scaling)
[  1.     0.    48.    20.     0.    24.7    0.14  22.  ]
[-0.84488505 -3.78365371 -1.09110524 -0.03365099 -0.69289057 -0.92556851
 -1.00230582 -0.95646168]
[-1.42071715 -4.0085737  -1.64470903 -0.66449885 -1.14222354 -0.07727427
 -1.41985998 -0.95772719]

Information gain scaling: Most negative(tested_negative) instances based on probability
Index of test:  238
The features are (Original, standa

In [13]:
# most positive, "tested_positive" -- Probability

Most_positive_1 = np.argmax(y_pred_prob_ori[:,1])


print "Original: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_1
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_positive_1]
print X_test_ss[Most_positive_1]
print X_test_ig[Most_positive_1]
print ""

Most_positive_2 = np.argmax(y_pred_prob_ss[:,1])
print "Standard scaling: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_2
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_positive_2]
print X_test_ss[Most_positive_2]
print X_test_ig[Most_positive_2]
print ""

Most_positive_3 = np.argmax(y_pred_prob_ig[:,1])
print "Information gain scaling: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_3
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_positive_3]
print X_test_ss[Most_positive_3]
print X_test_ig[Most_positive_3]
print ""

Original: Most positive(tested_positive) instances based on probability
Index of test:  497
The features are (Original, standard scaling, information gain scaling)
[   2.     197.      70.      99.       0.      34.7      0.575   62.   ]
[-0.54791859  2.38188392  0.04624525  4.92186584 -0.69289057  0.34362394
  0.31144581  2.44704844]
[-1.16240494  1.43755747 -0.94887059  2.51704109 -1.14222354  0.95304931
 -0.29483431  2.69075736]

Standard scaling: Most positive(tested_positive) instances based on probability
Index of test:  425
The features are (Original, standard scaling, information gain scaling)
[   0.    180.     78.     63.     14.     59.4     2.42   25.  ]
[-1.14185152  1.84983245  0.45982725  2.66365564 -0.57133003  3.4785293
  5.88356477 -0.70119842]
[-1.67902936  0.96758676 -0.69583844  1.06722542 -1.10472968  3.49794855
  4.47682627 -0.68409085]

Information gain scaling: Most positive(tested_positive) instances based on probability
Index of test:  425
The features are (O

In [14]:
# most negative, "tested_negative" -- Evidence

negative_evi_index_ori = np.argmax(abs(neg_evi_ori))
print "Original: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ori
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[negative_evi_index_ori]
print X_test_ss[negative_evi_index_ori]
print X_test_ig[negative_evi_index_ori]
print ""

negative_evi_index_ss = np.argmax(abs(neg_evi_ss))
print "Standard scaling: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ss
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[negative_evi_index_ss]
print X_test_ss[negative_evi_index_ss]
print X_test_ig[negative_evi_index_ss]
print ""

negative_evi_index_ig = np.argmax(abs(neg_evi_ig))
print "Information gain scaling: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ig
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[negative_evi_index_ig]
print X_test_ss[negative_evi_index_ig]
print X_test_ig[negative_evi_index_ig]
print ""

Original: Most negative(tested_negative) instances based on evidence
Index of test:  660
The features are (Original, standard scaling, information gain scaling)
[  0.00000000e+00   1.65000000e+02   9.00000000e+01   3.30000000e+01
   6.80000000e+02   5.23000000e+01   4.27000000e-01   2.30000000e+01]
[-1.14185152  1.38037527  1.08020025  0.7818138   5.21147866  2.57740266
 -0.13553176 -0.87137393]
[-1.67902936  0.55290672 -0.3162902  -0.1409543   0.67890661  2.76641881
 -0.67760167 -0.86651508]

Standard scaling: Most negative(tested_negative) instances based on evidence
Index of test:  238
The features are (Original, standard scaling, information gain scaling)
[  1.     0.    48.    20.     0.    24.7    0.14  22.  ]
[-0.84488505 -3.78365371 -1.09110524 -0.03365099 -0.69289057 -0.92556851
 -1.00230582 -0.95646168]
[-1.42071715 -4.0085737  -1.64470903 -0.66449885 -1.14222354 -0.07727427
 -1.41985998 -0.95772719]

Information gain scaling: Most negative(tested_negative) instances based on

In [15]:
# most positive, "tested_positive" -- Evidence 

positive_evi_index_ori = np.argmax(pos_evi_ori)
print "Original: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ori
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[positive_evi_index_ori]
print X_test_ss[positive_evi_index_ori]
print X_test_ig[positive_evi_index_ori]
print ""

positive_evi_index_ss = np.argmax(pos_evi_ss)
print "Standard scaling: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ss
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[positive_evi_index_ss]
print X_test_ss[positive_evi_index_ss]
print X_test_ig[positive_evi_index_ss]
print ""

positive_evi_index_ig = np.argmax(pos_evi_ig)
print "Information gain scaling: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ig
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[positive_evi_index_ig]
print X_test_ss[positive_evi_index_ig]
print X_test_ig[positive_evi_index_ig]
print ""

Original: Most positive(tested_positive) instances based on evidence
Index of test:  431
The features are (Original, standard scaling, information gain scaling)
[   4.     197.      70.      39.     744.      36.7      2.329   31.   ]
[ 0.04601433  2.38188392  0.04624525  1.15818217  5.76718399  0.59746243
  5.60873397 -0.19067191]
[-0.64578052  1.43755747 -0.94887059  0.10068164  0.85030709  1.15911403
  4.24147607 -0.13681817]

Standard scaling: Most positive(tested_positive) instances based on evidence
Index of test:  425
The features are (Original, standard scaling, information gain scaling)
[   0.    180.     78.     63.     14.     59.4     2.42   25.  ]
[-1.14185152  1.84983245  0.45982725  2.66365564 -0.57133003  3.4785293
  5.88356477 -0.70119842]
[-1.67902936  0.96758676 -0.69583844  1.06722542 -1.10472968  3.49794855
  4.47682627 -0.68409085]

Information gain scaling: Most positive(tested_positive) instances based on evidence
Index of test:  425
The features are (Original, 

In [16]:
# unc_1 Top 1 uncertain instances

uncertains_ori = np.min(y_pred_prob_ori, axis=1)
uis_ori = np.argsort(uncertains_ori)[::-1]
top10_uis_ori = uis_ori[:10]
print "Original: the most uncertain instance based on probability"
print "Index of test: ",uis_ori[0]
print "Actual label", y_test[uis_ori[0]]
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[uis_ori[0]]
print X_test_ss[uis_ori[0]]
print X_test_ig[uis_ori[0]]
print ""

uncertains_ss = np.min(y_pred_prob_ss, axis=1)
uis_ss = np.argsort(uncertains_ss)[::-1]
top10_uis_ss = uis_ss[:10]
print "standard scaling: the most uncertain instance based on probability"
print "Index of test: ",uis_ss[0]
print "Actual label", y_test[uis_ss[0]]
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[uis_ss[0]]
print X_test_ss[uis_ss[0]]
print X_test_ig[uis_ss[0]]
print ""

uncertains_ig = np.min(y_pred_prob_ig, axis=1)
uis_ig = np.argsort(uncertains_ig)[::-1]
top10_uis_ig = uis_ig[:10]
print "standard scaling: the most uncertain instance based on probability"
print "Index of test: ",uis_ig[0]
print "Actual label", y_test[uis_ig[0]]
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[uis_ig[0]]
print X_test_ss[uis_ig[0]]
print X_test_ig[uis_ig[0]]
print ""

print "The splitting point: ", scale_.mns

Original: the most uncertain instance based on probability
Index of test:  710
Actual label 0
The features are (Original, standard scaling, information gain scaling)
[   0.     147.      85.      54.       0.      42.8      0.375   24.   ]
[-1.14185152  0.81702665  0.8217115   2.09910309 -0.69289057  1.37166983
 -0.29257793 -0.78628618]
[-1.67902936  0.05529067 -0.4744353   0.70477151 -1.14222354  1.78761141
 -0.81208749 -0.77530297]

standard scaling: the most uncertain instance based on probability
Index of test:  508
Actual label 0
The features are (Original, standard scaling, information gain scaling)
[   0.     137.      84.      27.       0.      27.3      0.231   59.   ]
[-1.14185152  0.5040552   0.77001375  0.40544544 -0.69289057 -0.59557847
 -0.72747502  2.19178518]
[-1.67902936 -0.22116269 -0.50606432 -0.38259025 -1.14222354  0.19060986
 -1.18450978  2.41712101]

standard scaling: the most uncertain instance based on probability
Index of test:  326
Actual label 1
The features

In [17]:
# The indices of top 10 uncertain instances for each classifier

print top10_uis_ori
print top10_uis_ss
print top10_uis_ig

[710 506 592 173 445 361 108 170 564 589]
[508 680  46 218 316 626 415  29 670 673]
[326 597 299 400 662 186 344 111 445  11]


In [18]:
# unc_ce from Top 10 uncertain instances

min_evidence_top10_ori = np.min([abs(neg_evi_ori[top10_uis_ori]),abs(pos_evi_ori[top10_uis_ori])], axis=0)
index_ce_ori = np.argmax(min_evidence_top10_ori)
print "Original: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ori[index_ce_ori]
print "Actual label", y_test[top10_uis_ori[index_ce_ori]]
print X_test_ori[top10_uis_ori[index_ce_ori]]
print X_test_ss[top10_uis_ori[index_ce_ori]]
print X_test_ig[top10_uis_ori[index_ce_ori]]

min_evidence_top10_ss = np.min([abs(neg_evi_ss[top10_uis_ss]),abs(pos_evi_ss[top10_uis_ss])], axis=0)
index_ce_ss = np.argmax(min_evidence_top10_ss)
print "Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ss[index_ce_ss]
print "Actual label", y_test[top10_uis_ss[index_ce_ss]]
print X_test_ori[top10_uis_ss[index_ce_ss]]
print X_test_ss[top10_uis_ss[index_ce_ss]]
print X_test_ig[top10_uis_ss[index_ce_ss]]

min_evidence_top10_ig = np.min([abs(neg_evi_ig[top10_uis_ig]),abs(pos_evi_ig[top10_uis_ig])], axis=0)
index_ce_ig = np.argmax(min_evidence_top10_ig)
print "Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ig[index_ce_ig]
print "Actual label", y_test[top10_uis_ig[index_ce_ig]]
print X_test_ori[top10_uis_ig[index_ce_ig]]
print X_test_ss[top10_uis_ig[index_ce_ig]]
print X_test_ig[top10_uis_ig[index_ce_ig]]

Original: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  592
Actual label 0
[   6.     154.      78.      41.     140.      46.1      0.571   27.   ]
[ 0.63994726  1.03610667  0.45982725  1.28363829  0.52271486  1.79050334
  0.29936533 -0.53102292]
[-0.1291561   0.24880802 -0.69583844  0.18122696 -0.76728498  2.12761819
 -0.30517938 -0.50166663]
Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  218
Actual label 1
[  5.     85.     74.     22.      0.     29.      1.224  32.   ]
[ 0.3429808  -1.12339636  0.25303625  0.09180513 -0.69289057 -0.37981576
  2.27150283 -0.10558415]
[-0.38746831 -1.65872015 -0.82235451 -0.58395353 -1.14222354  0.36576487
  1.38365226 -0.04560606]
Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  11
Actual label 0
[  13.     106.      70.       0.       0.      34.2      0.251   52.   ]
[ 2.71871

In [19]:
# unc_ie from Top 10 uncertain instances 

max_evidence_top10_ori = np.max([abs(neg_evi_ori[top10_uis_ori]),abs(pos_evi_ori[top10_uis_ori])], axis=0)
index_ie_ori = np.argmin(max_evidence_top10_ori)
print "Original: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ori[index_ie_ori]
print "Actual label", y_test[top10_uis_ori[index_ie_ori]]
print X_test_ori[top10_uis_ori[index_ie_ori]]
print X_test_ss[top10_uis_ori[index_ie_ori]]
print X_test_ig[top10_uis_ori[index_ie_ori]]

max_evidence_top10_ss = np.max([abs(neg_evi_ss[top10_uis_ss]),abs(pos_evi_ss[top10_uis_ss])], axis=0)
index_ie_ss = np.argmin(max_evidence_top10_ss)
print "Standard scaling: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ss[index_ie_ss]
print "Actual label", y_test[top10_uis_ss[index_ie_ss]]
print X_test_ori[top10_uis_ss[index_ie_ss]]
print X_test_ss[top10_uis_ss[index_ie_ss]]
print X_test_ig[top10_uis_ss[index_ie_ss]]

max_evidence_top10_ig = np.max([abs(neg_evi_ig[top10_uis_ig]),abs(pos_evi_ig[top10_uis_ig])], axis=0)
index_ie_ig = np.argmin(max_evidence_top10_ig)
print "Standard scaling: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ig[index_ie_ig]
print "Actual label", y_test[top10_uis_ig[index_ie_ig]]
print X_test_ori[top10_uis_ig[index_ie_ig]]
print X_test_ss[top10_uis_ig[index_ie_ig]]
print X_test_ig[top10_uis_ig[index_ie_ig]]

Original: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  108
Actual label 0
[   3.    111.     58.     31.     44.     29.5     0.43   22.  ]
[-0.25095213 -0.30967058 -0.57412775  0.65635768 -0.31084315 -0.31635613
 -0.1264714  -0.95646168]
[-0.90409273 -0.93994142 -1.32841883 -0.22149962 -1.02438571  0.41728105
 -0.66984287 -0.95772719]
Standard scaling: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  415
Actual label 0
[   4.     120.      68.       0.       0.      29.6      0.709   34.   ]
[ 0.04601433 -0.02799627 -0.05715025 -1.28821221 -0.69289057 -0.30366421
  0.71614171  0.06459135]
[-0.64578052 -0.6911334  -1.01212863 -1.469952   -1.14222354  0.42758429
  0.05172532  0.13681817]
Standard scaling: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  662
Actual label 0
[   1.     153.      82.      42.     485.      40.6      0.687   23.   ]
[-0.84488505  1.0

In [20]:
# Least negative among Top 10 possitive 

top_positive_index_ori = np.argsort(pos_evi_ori)[::-1]
tp_ori = top_positive_index_ori[:10]
neg_info_ori = neg_evi_ori[tp_ori]
index_least_neg_ori = np.argmin(neg_info_ori)
print "Index of test:",tp_ori[index_least_neg_ori]
print "Actual label", y_test[tp_ori[index_least_neg_ori]]
print X_test_ori[tp_ori[index_least_neg_ori]]
print X_test_ss[tp_ori[index_least_neg_ori]]
print X_test_ig[tp_ori[index_least_neg_ori]]
print ""

top_positive_index_ss = np.argsort(pos_evi_ss)[::-1]
tp_ss = top_positive_index_ss[:10]
neg_info_ss = neg_evi_ss[tp_ss]
index_least_neg_ss = np.argmin(neg_info_ss)
print "Index of test:",tp_ss[index_least_neg_ss]
print "Actual label", y_test[tp_ss[index_least_neg_ss]]
print X_test_ori[tp_ss[index_least_neg_ss]]
print X_test_ss[tp_ss[index_least_neg_ss]]
print X_test_ig[tp_ss[index_least_neg_ss]]
print ""

top_positive_index_ig = np.argsort(pos_evi_ig)[::-1]
tp_ig = top_positive_index_ig[:10]
neg_info_ig = neg_evi_ig[tp_ig]
index_least_neg_ig = np.argmin(neg_info_ig)
print "Index of test:",tp_ig[index_least_neg_ig]
print "Actual label", y_test[tp_ig[index_least_neg_ig]]
print X_test_ori[tp_ig[index_least_neg_ig]]
print X_test_ss[tp_ig[index_least_neg_ig]]
print X_test_ig[tp_ig[index_least_neg_ig]]

Index of test: 431
Actual label 0
[   4.     197.      70.      39.     744.      36.7      2.329   31.   ]
[ 0.04601433  2.38188392  0.04624525  1.15818217  5.76718399  0.59746243
  5.60873397 -0.19067191]
[-0.64578052  1.43755747 -0.94887059  0.10068164  0.85030709  1.15911403
  4.24147607 -0.13681817]

Index of test: 431
Actual label 0
[   4.     197.      70.      39.     744.      36.7      2.329   31.   ]
[ 0.04601433  2.38188392  0.04624525  1.15818217  5.76718399  0.59746243
  5.60873397 -0.19067191]
[-0.64578052  1.43755747 -0.94887059  0.10068164  0.85030709  1.15911403
  4.24147607 -0.13681817]

Index of test: 376
Actual label 1
[   0.     137.      40.      35.     168.      43.1      2.288   33.   ]
[-1.14185152  0.5040552  -1.50468724  0.90726993  0.76583594  1.4097456
  5.4849091  -0.0204964 ]
[-1.67902936 -0.22116269 -1.89774119 -0.06040899 -0.69229727  1.81852112
  4.13543917  0.04560606]


In [21]:
# The indices of top 10 positive based on positive evidence

print tp_ori
print tp_ss
print tp_ig

[431 425   0 497 537 311 310 693 515  53]
[425 431 311   0 712 537 376 310 693 471]
[425 431 311 712   0 376 537 471 412 695]


In [22]:
# Least positive among Top 10 negative instances

top_negative_index_ori = np.argsort(abs(neg_evi_ori))[::-1]
tn_ori = top_negative_index_ori[:10]
pos_info_ori = pos_evi_ori[tp_ori]
index_least_pos_ori = np.argmin(pos_info_ori)
print "Index of test:",tn_ori[index_least_pos_ori]
print "Actual label", y_test[tn_ori[index_least_pos_ori]]
print X_test_ori[tn_ori[index_least_pos_ori]]
print X_test_ss[tn_ori[index_least_pos_ori]]
print X_test_ig[tn_ori[index_least_pos_ori]]
print ""

top_negative_index_ss = np.argsort(abs(neg_evi_ss))[::-1]
tn_ss = top_negative_index_ss[:10]
pos_info_ss = pos_evi_ss[tp_ss]
index_least_pos_ss = np.argmin(pos_info_ss)
print "Index of test:",tn_ss[index_least_pos_ss]
print "Actual label", y_test[tn_ss[index_least_pos_ss]]
print X_test_ori[tn_ss[index_least_pos_ss]]
print X_test_ss[tn_ss[index_least_pos_ss]]
print X_test_ig[tn_ss[index_least_pos_ss]]
print ""

top_negative_index_ig = np.argsort(abs(neg_evi_ig))[::-1]
tn_ig = top_negative_index_ig[:10]
pos_info_ig = pos_evi_ig[tp_ig]
index_least_pos_ig = np.argmin(pos_info_ig)
print "Index of test:",tn_ig[index_least_pos_ig]
print "Actual label", y_test[tn_ig[index_least_pos_ig]]
print X_test_ori[tn_ig[index_least_pos_ig]]
print X_test_ss[tn_ig[index_least_pos_ig]]
print X_test_ig[tn_ig[index_least_pos_ig]]
print ""

Index of test: 145
Actual label 1
[   9.     171.     110.      24.     240.      45.4      0.721   54.   ]
[ 1.53084665  1.56815814  2.11415525  0.21726125  1.39100445  1.70165987
  0.75238313  1.76634642]
[ 0.64578052  0.71877873  0.3162902  -0.50340822 -0.49947172  2.05549554
  0.08276051  1.96106045]

Index of test: 404
Actual label 0
[  0.     94.      0.      0.      0.      0.      0.256  25.   ]
[-1.14185152 -0.84172205 -3.57259724 -1.28821221 -0.69289057 -4.06047387
 -0.65197205 -0.70119842]
[-1.67902936 -1.40991213 -3.16290198 -1.469952   -1.14222354 -2.62217351
 -1.11985313 -0.68409085]

Index of test: 642
Actual label 0
[  2.     56.     56.     28.     45.     24.2     0.332  22.   ]
[-0.54791859 -2.03101358 -0.67752325  0.4681735  -0.30216026 -0.98902814
 -0.42244303 -0.95646168]
[-1.16240494 -2.46043489 -1.39167687 -0.34231759 -1.02170758 -0.12879045
 -0.92329693 -0.95772719]



In [23]:
# The indices of top 10 negative based on negative evidence

print tn_ori
print tn_ss
print tn_ig

[660 258 431 118 111 160 544 421 662 145]
[238 470 204 616 543  10 461 728 642 404]
[238 470 204 543 616 461 728 404  10 642]
