In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split, ShuffleSplit
from classifiers import TransparentLogisticRegression
from matplotlib import pylab as pl
from scipy.sparse import diags
from IPython import display
from scale import decision_tree_scale

In [2]:
#diabetes  http://archive.ics.uci.edu/ml/datasets/Pima+Indians+Diabetes
# ['preg' 'plas' 'pres' 'skin' 'insu' 'mass' 'pedi' 'age']
# preg: the # of pregnant 
# plas: Plasma glucose concentration a 2 hours in an oral glucose tolerance test 
# pres: Diastolic blood pressure
# skin: Triceps skin fold thickness
# insu: 2-Hour serum insulin
# mass: Body mass index
# pedi: Diabetes pedigree function 
# Age 

dataset = "diabetes.csv"
class_index = 8
num_cols = 9
classes= ['tested_negative', 'tested_positive']
read_cols = [i for i in range(num_cols) if i != class_index]
file_path = "D:\\IIT_Master\\2016 Spring\\CS597\\uci\\uci\\uci-tar\\nominal\\"+dataset

In [3]:
with open(file_path, 'r') as f:
    header = f.readline()
    #print header
    #header = np.fromstring("a, b", dtype=np.str_, sep=',')
    header = np.array(header.split(','))
    feature_names = header[read_cols]
    
    print header

['preg' 'plas' 'pres' 'skin' 'insu' 'mass' 'pedi' 'age' 'class\n']


In [4]:
# Loading the data and splitting the train, test

X = np.loadtxt(file_path, dtype=float, delimiter=",", skiprows=1, \
                   usecols=read_cols)
y = np.loadtxt(file_path, dtype=int, delimiter=",", skiprows=1, \
                   usecols=(class_index,), converters={class_index: lambda x: classes.index(x)})

num_inst, num_feat = np.shape(X)
print "The shape of X:",np.shape(X)
ss = ShuffleSplit(num_inst, n_iter=1, test_size=0.33, random_state=40)

for i, j in ss:
    train_index = i 
    test_index = j
    
y_train = y[train_index]
y_test = y[test_index]

print len(y_test)

The shape of X: (768L, 8L)
254


In [5]:
# Determine binary features
num_features = X.shape[1]
non_binary = []
binary = []
for i in range(num_features):
    if len(np.unique(X[:,i])) != 2:
        non_binary.append(i)
    else:
        binary.append(i)

In [6]:
# Original features
X_original = np.copy(X)

X_train_ori = X_original[train_index]
X_test_ori = X_original[test_index]

# Standard scale non binary features

X_ss = np.copy(X)
# X_ss = scale(X)

if len(non_binary) > 0:
    X_ss[:,non_binary]=scale(X[:,non_binary])
    
if len(binary) > 0: 
    print "binary features exist"
    X_b = X_ss[:,binary]
    X_b[X_b == 0] = -1
    X_ss[:,binary] = X_b
    
X_train_ss = X_ss[train_index]
X_test_ss = X_ss[test_index]

# Information gain scaling non binary features

X_ig = np.copy(X_original)
scale_ = decision_tree_scale()

X_train_ig = X_ig[train_index]
X_test_ig = X_ig[test_index]

if len(non_binary) > 0: 
    print "IG Scale for non_binary features -- Train"
    X_train_ig[:,non_binary]=scale_.fit_transform(X_train_ig[:,non_binary], y_train)

if len(binary) > 0: 
    print "binary features exist"
    X_b = X_train_ig[:,binary]
    X_b[X_b == 0] = -1
    X_train_ig[:,binary] = X_b
    
if len(non_binary) > 0:
    
    print "IG Scale for non_binary features -- Test"
    X_test_ig[:,non_binary]=scale_.transform(X_test_ig[:,non_binary])
    
if len(binary) > 0:
    
    print "binary features exist"
    print binary
    X_b = X_test_ig[:,binary]
    X_b[X_b == 0] = -1
    X_test_ig[:,binary] = X_b

print header[non_binary]

IG Scale for non_binary features -- Train
IG Scale for non_binary features -- Test
['preg' 'plas' 'pres' 'skin' 'insu' 'mass' 'pedi' 'age']


In [7]:
print "The mean of each features:"
print np.mean(X, axis=0)
print "The best splitting of each features"
print scale_.mns

The mean of each features:
[   3.84505208  120.89453125   69.10546875   20.53645833   79.79947917
   31.99257812    0.4718763    33.24088542]
The best splitting of each features
[   6.5     127.5      69.       28.5     122.5      26.9       0.2375
   28.5   ]


In [8]:
clf_ori = TransparentLogisticRegression()
clf_ss = TransparentLogisticRegression()
clf_ig = TransparentLogisticRegression()

In [9]:
# fit train instances for each classifier

clf_ori.fit(X_train_ori, y_train)
clf_ss.fit(X_train_ss, y_train)
clf_ig.fit(X_train_ig, y_train)

TransparentLogisticRegression(C=1.0, class_weight=None, dual=False,
               fit_intercept=True, intercept_scaling=1, max_iter=100,
               multi_class='ovr', n_jobs=1, penalty='l2',
               random_state=None, solver='liblinear', tol=0.0001,
               verbose=0, warm_start=False)

In [10]:
# print the weights for each classifiers

print clf_ori.coef_
print clf_ss.coef_
print clf_ig.coef_
print ""

print clf_ori.intercept_ 
print clf_ss.intercept_ 
print clf_ig.intercept_

[[ 0.1439551   0.02524085 -0.01827704  0.00804746 -0.00147434  0.0478546
   0.70378772  0.00439795]]
[[ 0.52972941  1.05425945 -0.28093931  0.13508668 -0.24497126  0.62607176
   0.32810724  0.15760426]]
[[ 0.68734354  1.04734965 -0.29487411  0.16932919 -0.25526673  0.73524881
   0.38840003  0.15454245]]

[-5.21357724]
[-0.95714373]
[-1.00087082]


In [11]:
# Predict test instances for each classifier

y_predict_ori = clf_ori.predict(X_test_ori)
y_pred_prob_ori = clf_ori.predict_proba(X_test_ori)
neg_evi_ori, pos_evi_ori = clf_ori.predict_evidences(X_test_ori)

y_predict_ss = clf_ss.predict(X_test_ss)
y_pred_prob_ss = clf_ss.predict_proba(X_test_ss)
neg_evi_ss, pos_evi_ss = clf_ss.predict_evidences(X_test_ss)

y_predict_ig = clf_ig.predict(X_test_ig)
y_pred_prob_ig = clf_ig.predict_proba(X_test_ig)
neg_evi_ig, pos_evi_ig = clf_ig.predict_evidences(X_test_ig)

print accuracy_score(y_test,y_predict_ori)
print accuracy_score(y_test,y_predict_ss)
print accuracy_score(y_test,y_predict_ig)

0.732283464567
0.732283464567
0.740157480315


In [12]:
# most negative, "tested_negative" -- Probability

Most_negative_1 = np.argmax(y_pred_prob_ori[:,0])
print "Original: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_1
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_negative_1]
print X_test_ss[Most_negative_1]
print X_test_ig[Most_negative_1]
print ""

Most_negative_2 = np.argmax(y_pred_prob_ss[:,0])
print "Standard scaling: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_2
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_negative_2]
print X_test_ss[Most_negative_2]
print X_test_ig[Most_negative_2]
print ""

Most_negative_3 = np.argmax(y_pred_prob_ig[:,0])
print "Information gain scaling: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_3
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_negative_3]
print X_test_ss[Most_negative_3]
print X_test_ig[Most_negative_3]
print ""

Original: Most negative(tested_negative) instances based on probability
Index of test:  238
The features are (Original, standard scaling, information gain scaling)
[  1.     0.    48.    20.     0.    24.7    0.14  22.  ]
[-0.84488505 -3.78365371 -1.09110524 -0.03365099 -0.69289057 -0.92556851
 -1.00230582 -0.95646168]
[-1.28010463 -4.04081109 -1.02504413 -0.45963455 -1.0108045  -0.23020211
 -0.24019749 -0.52546546]

Standard scaling: Most negative(tested_negative) instances based on probability
Index of test:  238
The features are (Original, standard scaling, information gain scaling)
[  1.     0.    48.    20.     0.    24.7    0.14  22.  ]
[-0.84488505 -3.78365371 -1.09110524 -0.03365099 -0.69289057 -0.92556851
 -1.00230582 -0.95646168]
[-1.28010463 -4.04081109 -1.02504413 -0.45963455 -1.0108045  -0.23020211
 -0.24019749 -0.52546546]

Information gain scaling: Most negative(tested_negative) instances based on probability
Index of test:  238
The features are (Original, standard scali

In [13]:
# most positive, "tested_positive" -- Probability

Most_positive_1 = np.argmax(y_pred_prob_ori[:,1])


print "Original: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_1
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_positive_1]
print X_test_ss[Most_positive_1]
print X_test_ig[Most_positive_1]
print ""

Most_positive_2 = np.argmax(y_pred_prob_ss[:,1])
print "Standard scaling: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_2
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_positive_2]
print X_test_ss[Most_positive_2]
print X_test_ig[Most_positive_2]
print ""

Most_positive_3 = np.argmax(y_pred_prob_ig[:,1])
print "Information gain scaling: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_3
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[Most_positive_3]
print X_test_ss[Most_positive_3]
print X_test_ig[Most_positive_3]
print ""

Original: Most positive(tested_positive) instances based on probability
Index of test:  199
The features are (Original, standard scaling, information gain scaling)
[  17.     163.      72.      41.     114.      40.9      0.817   47.   ]
[ 3.90657835  1.31778097  0.14964075  1.28363829  0.29695956  1.13052326
  1.04231453  1.17073215]
[ 2.44383611  1.12508858  0.14643488  0.67593316 -0.07013746  1.46492249
  1.42763533  1.49555553]

Standard scaling: Most positive(tested_positive) instances based on probability
Index of test:  199
The features are (Original, standard scaling, information gain scaling)
[  17.     163.      72.      41.     114.      40.9      0.817   47.   ]
[ 3.90657835  1.31778097  0.14964075  1.28363829  0.29695956  1.13052326
  1.04231453  1.17073215]
[ 2.44383611  1.12508858  0.14643488  0.67593316 -0.07013746  1.46492249
  1.42763533  1.49555553]

Information gain scaling: Most positive(tested_positive) instances based on probability
Index of test:  199
The featur

In [14]:
# most negative, "tested_negative" -- Evidence

negative_evi_index_ori = np.argmax(abs(neg_evi_ori))
print "Original: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ori
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[negative_evi_index_ori]
print X_test_ss[negative_evi_index_ori]
print X_test_ig[negative_evi_index_ori]
print ""

negative_evi_index_ss = np.argmax(abs(neg_evi_ss))
print "Standard scaling: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ss
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[negative_evi_index_ss]
print X_test_ss[negative_evi_index_ss]
print X_test_ig[negative_evi_index_ss]
print ""

negative_evi_index_ig = np.argmax(abs(neg_evi_ig))
print "Information gain scaling: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ig
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[negative_evi_index_ig]
print X_test_ss[negative_evi_index_ig]
print X_test_ig[negative_evi_index_ig]
print ""

Original: Most negative(tested_negative) instances based on evidence
Index of test:  145
The features are (Original, standard scaling, information gain scaling)
[   9.     171.     110.      24.     240.      45.4      0.721   54.   ]
[ 1.53084665  1.56815814  2.11415525  0.21726125  1.39100445  1.70165987
  0.75238313  1.76634642]
[ 0.58186574  1.37862966  2.00127663 -0.24333594  0.96954718  1.93579043
  1.19113318  2.06144141]

Standard scaling: Most negative(tested_negative) instances based on evidence
Index of test:  238
The features are (Original, standard scaling, information gain scaling)
[  1.     0.    48.    20.     0.    24.7    0.14  22.  ]
[-0.84488505 -3.78365371 -1.09110524 -0.03365099 -0.69289057 -0.92556851
 -1.00230582 -0.95646168]
[-1.28010463 -4.04081109 -1.02504413 -0.45963455 -1.0108045  -0.23020211
 -0.24019749 -0.52546546]

Information gain scaling: Most negative(tested_negative) instances based on evidence
Index of test:  238
The features are (Original, standar

In [15]:
# most positive, "tested_positive" -- Evidence 

positive_evi_index_ori = np.argmax(pos_evi_ori)
print "Original: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ori
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[positive_evi_index_ori]
print X_test_ss[positive_evi_index_ori]
print X_test_ig[positive_evi_index_ori]
print ""

positive_evi_index_ss = np.argmax(pos_evi_ss)
print "Standard scaling: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ss
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[positive_evi_index_ss]
print X_test_ss[positive_evi_index_ss]
print X_test_ig[positive_evi_index_ss]
print ""

positive_evi_index_ig = np.argmax(pos_evi_ig)
print "Information gain scaling: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ig
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[positive_evi_index_ig]
print X_test_ss[positive_evi_index_ig]
print X_test_ig[positive_evi_index_ig]
print ""

Original: Most positive(tested_positive) instances based on evidence
Index of test:  199
The features are (Original, standard scaling, information gain scaling)
[  17.     163.      72.      41.     114.      40.9      0.817   47.   ]
[ 3.90657835  1.31778097  0.14964075  1.28363829  0.29695956  1.13052326
  1.04231453  1.17073215]
[ 2.44383611  1.12508858  0.14643488  0.67593316 -0.07013746  1.46492249
  1.42763533  1.49555553]

Standard scaling: Most positive(tested_positive) instances based on evidence
Index of test:  199
The features are (Original, standard scaling, information gain scaling)
[  17.     163.      72.      41.     114.      40.9      0.817   47.   ]
[ 3.90657835  1.31778097  0.14964075  1.28363829  0.29695956  1.13052326
  1.04231453  1.17073215]
[ 2.44383611  1.12508858  0.14643488  0.67593316 -0.07013746  1.46492249
  1.42763533  1.49555553]

Information gain scaling: Most positive(tested_positive) instances based on evidence
Index of test:  199
The features are (O

In [16]:
# unc_1 Top 1 uncertain instances

uncertains_ori = np.min(y_pred_prob_ori, axis=1)
uis_ori = np.argsort(uncertains_ori)[::-1]
top10_uis_ori = uis_ori[:10]
print "Original: the most uncertain instance based on probability"
print "Index of test: ",uis_ori[0]
print "Actual label", y_test[uis_ori[0]]
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[uis_ori[0]]
print X_test_ss[uis_ori[0]]
print X_test_ig[uis_ori[0]]
print ""

uncertains_ss = np.min(y_pred_prob_ss, axis=1)
uis_ss = np.argsort(uncertains_ss)[::-1]
top10_uis_ss = uis_ss[:10]
print "standard scaling: the most uncertain instance based on probability"
print "Index of test: ",uis_ss[0]
print "Actual label", y_test[uis_ss[0]]
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[uis_ss[0]]
print X_test_ss[uis_ss[0]]
print X_test_ig[uis_ss[0]]
print ""

uncertains_ig = np.min(y_pred_prob_ig, axis=1)
uis_ig = np.argsort(uncertains_ig)[::-1]
top10_uis_ig = uis_ig[:10]
print "standard scaling: the most uncertain instance based on probability"
print "Index of test: ",uis_ig[0]
print "Actual label", y_test[uis_ig[0]]
print "The features are (Original, standard scaling, information gain scaling)"
print X_test_ori[uis_ig[0]]
print X_test_ss[uis_ig[0]]
print X_test_ig[uis_ig[0]]
print ""

Original: the most uncertain instance based on probability
Index of test:  45
Actual label 0
The features are (Original, standard scaling, information gain scaling)
[  12.     121.      78.      17.       0.      26.5      0.259   62.   ]
[ 2.42174604  0.00330087  0.45982725 -0.22183517 -0.69289057 -0.69711387
 -0.6429117   2.44704844]
[ 1.28010463 -0.20600213  0.43930463 -0.62185851 -1.0108045  -0.04185493
  0.05296663  2.70816813]

standard scaling: the most uncertain instance based on probability
Index of test:  111
Actual label 1
The features are (Original, standard scaling, information gain scaling)
[  0.00000000e+00   1.81000000e+02   8.80000000e+01   4.40000000e+01
   5.10000000e+02   4.33000000e+01   2.22000000e-01   2.60000000e+01]
[-1.14185152  1.88112959  0.97680475  1.47182248  3.73538635  1.43512945
 -0.75465609 -0.61611067]
[-1.51285093  1.69555602  0.92742088  0.83815712  3.19744282  1.71605206
 -0.03818524 -0.2021021 ]

standard scaling: the most uncertain instance base

In [17]:
# The indices of top 10 uncertain instances for each classifier

print top10_uis_ori
print top10_uis_ss
print top10_uis_ig

[ 45 182  40  75 197 126 235 188  78 179]
[111 182 197 186  58 151  45 179  40 172]
[111  58 197 186 182  45  51  40 151  78]


In [18]:
# unc_ce from Top 10 uncertain instances

min_evidence_top10_ori = np.min([abs(neg_evi_ori[top10_uis_ori]),abs(pos_evi_ori[top10_uis_ori])], axis=0)
index_ce_ori = np.argmax(min_evidence_top10_ori)
print "Original: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ori[index_ce_ori]
print "Actual label", y_test[top10_uis_ori[index_ce_ori]]
print X_test_ori[top10_uis_ori[index_ce_ori]]
print X_test_ss[top10_uis_ori[index_ce_ori]]
print X_test_ig[top10_uis_ori[index_ce_ori]]

min_evidence_top10_ss = np.min([abs(neg_evi_ss[top10_uis_ss]),abs(pos_evi_ss[top10_uis_ss])], axis=0)
index_ce_ss = np.argmax(min_evidence_top10_ss)
print "Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ss[index_ce_ss]
print "Actual label", y_test[top10_uis_ss[index_ce_ss]]
print X_test_ori[top10_uis_ss[index_ce_ss]]
print X_test_ss[top10_uis_ss[index_ce_ss]]
print X_test_ig[top10_uis_ss[index_ce_ss]]

min_evidence_top10_ig = np.min([abs(neg_evi_ig[top10_uis_ig]),abs(pos_evi_ig[top10_uis_ig])], axis=0)
index_ce_ig = np.argmax(min_evidence_top10_ig)
print "Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ig[index_ce_ig]
print "Actual label", y_test[top10_uis_ig[index_ce_ig]]
print X_test_ori[top10_uis_ig[index_ce_ig]]
print X_test_ss[top10_uis_ig[index_ce_ig]]
print X_test_ig[top10_uis_ig[index_ce_ig]]

Original: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  40
Actual label 1
[   3.     158.      76.      36.     245.      31.6      0.851   28.   ]
[-0.25095213  1.16129525  0.35643175  0.96999799  1.43441893 -0.04982572
  1.14499856 -0.44593516]
[-0.81461204  0.9666254   0.34168138  0.4055599   1.0108045   0.49179541
  1.5113965  -0.04042042]
Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  111
Actual label 1
[  0.00000000e+00   1.81000000e+02   8.80000000e+01   4.40000000e+01
   5.10000000e+02   4.33000000e+01   2.22000000e-01   2.60000000e+01]
[-1.14185152  1.88112959  0.97680475  1.47182248  3.73538635  1.43512945
 -0.75465609 -0.61611067]
[-1.51285093  1.69555602  0.92742088  0.83815712  3.19744282  1.71605206
 -0.03818524 -0.2021021 ]
Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  111
Actual label 1
[  0.00000

In [19]:
# unc_ie from Top 10 uncertain instances 

max_evidence_top10_ori = np.max([abs(neg_evi_ori[top10_uis_ori]),abs(pos_evi_ori[top10_uis_ori])], axis=0)
index_ie_ori = np.argmin(max_evidence_top10_ori)
print "Original: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ori[index_ie_ori]
print "Actual label", y_test[top10_uis_ori[index_ie_ori]]
print X_test_ori[top10_uis_ori[index_ie_ori]]
print X_test_ss[top10_uis_ori[index_ie_ori]]
print X_test_ig[top10_uis_ori[index_ie_ori]]

max_evidence_top10_ss = np.max([abs(neg_evi_ss[top10_uis_ss]),abs(pos_evi_ss[top10_uis_ss])], axis=0)
index_ie_ss = np.argmin(max_evidence_top10_ss)
print "Standard scaling: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ss[index_ie_ss]
print "Actual label", y_test[top10_uis_ss[index_ie_ss]]
print X_test_ori[top10_uis_ss[index_ie_ss]]
print X_test_ss[top10_uis_ss[index_ie_ss]]
print X_test_ig[top10_uis_ss[index_ie_ss]]

max_evidence_top10_ig = np.max([abs(neg_evi_ig[top10_uis_ig]),abs(pos_evi_ig[top10_uis_ig])], axis=0)
index_ie_ig = np.argmin(max_evidence_top10_ig)
print "Standard scaling: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ig[index_ie_ig]
print "Actual label", y_test[top10_uis_ig[index_ie_ig]]
print X_test_ori[top10_uis_ig[index_ie_ig]]
print X_test_ss[top10_uis_ig[index_ie_ig]]
print X_test_ig[top10_uis_ig[index_ie_ig]]

Original: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  197
Actual label 1
[   4.     136.      70.       0.       0.      31.2      1.182   22.   ]
[ 0.04601433  0.47275805  0.04624525 -1.28821221 -0.69289057 -0.10059342
  2.14465784 -0.95646168]
[-0.58186574  0.26938741  0.04881163 -1.54112761 -1.0108045   0.44994048
  2.32683618 -0.52546546]
Standard scaling: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  186
Actual label 0
[   4.     154.      72.      29.     126.      31.3      0.338   37.   ]
[ 0.04601433  1.03610667  0.14964075  0.53090156  0.40115431 -0.08790149
 -0.40432232  0.31985461]
[-0.58186574  0.83985485  0.14643488  0.02703733  0.02888013  0.46040421
  0.24758818  0.68714714]
Standard scaling: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  182
Actual label 0
[  10.     122.      78.      31.       0.      27.6      0.512   45.   ]
[ 1.82781

In [20]:
# Least negative among Top 10 possitive 

top_positive_index_ori = np.argsort(pos_evi_ori)[::-1]
tp_ori = top_positive_index_ori[:10]
neg_info_ori = neg_evi_ori[tp_ori]
index_least_neg_ori = np.argmin(neg_info_ori)
print "Index of test:",tp_ori[index_least_neg_ori]
print "Actual label", y_test[tp_ori[index_least_neg_ori]]
print X_test_ori[tp_ori[index_least_neg_ori]]
print X_test_ss[tp_ori[index_least_neg_ori]]
print X_test_ig[tp_ori[index_least_neg_ori]]
print ""

top_positive_index_ss = np.argsort(pos_evi_ss)[::-1]
tp_ss = top_positive_index_ss[:10]
neg_info_ss = neg_evi_ss[tp_ss]
index_least_neg_ss = np.argmin(neg_info_ss)
print "Index of test:",tp_ss[index_least_neg_ss]
print "Actual label", y_test[tp_ss[index_least_neg_ss]]
print X_test_ori[tp_ss[index_least_neg_ss]]
print X_test_ss[tp_ss[index_least_neg_ss]]
print X_test_ig[tp_ss[index_least_neg_ss]]
print ""

top_positive_index_ig = np.argsort(pos_evi_ig)[::-1]
tp_ig = top_positive_index_ig[:10]
neg_info_ig = neg_evi_ig[tp_ig]
index_least_neg_ig = np.argmin(neg_info_ig)
print "Index of test:",tp_ig[index_least_neg_ig]
print "Actual label", y_test[tp_ig[index_least_neg_ig]]
print X_test_ori[tp_ig[index_least_neg_ig]]
print X_test_ss[tp_ig[index_least_neg_ig]]
print X_test_ig[tp_ig[index_least_neg_ig]]

Index of test: 145
Actual label 1
[   9.     171.     110.      24.     240.      45.4      0.721   54.   ]
[ 1.53084665  1.56815814  2.11415525  0.21726125  1.39100445  1.70165987
  0.75238313  1.76634642]
[ 0.58186574  1.37862966  2.00127663 -0.24333594  0.96954718  1.93579043
  1.19113318  2.06144141]

Index of test: 0
Actual label 1
[   3.     173.      82.      48.     465.      38.4      2.137   25.   ]
[-0.25095213  1.63075243  0.66661825  1.72273472  3.34465603  0.81322515
  5.02887118 -0.70119842]
[-0.81461204  1.44201494  0.63455113  1.05445573  2.82612688  1.20332919
  4.67953978 -0.28294294]

Index of test: 0
Actual label 1
[   3.     173.      82.      48.     465.      38.4      2.137   25.   ]
[-0.25095213  1.63075243  0.66661825  1.72273472  3.34465603  0.81322515
  5.02887118 -0.70119842]
[-0.81461204  1.44201494  0.63455113  1.05445573  2.82612688  1.20332919
  4.67953978 -0.28294294]


In [21]:
# The indices of top 10 positive based on positive evidence

print tp_ori
print tp_ss
print tp_ig

[199   6 145   0 247  53  39 206 220 207]
[199  39   0   6 145 247  53 220  23 233]
[199   0  39 145   6  23  53 233 247 139]


In [22]:
# Least positive among Top 10 negative instances

top_negative_index_ori = np.argsort(abs(neg_evi_ori))[::-1]
tn_ori = top_negative_index_ori[:10]
pos_info_ori = pos_evi_ori[tp_ori]
index_least_pos_ori = np.argmin(pos_info_ori)
print "Index of test:",tn_ori[index_least_pos_ori]
print "Actual label", y_test[tn_ori[index_least_pos_ori]]
print X_test_ori[tn_ori[index_least_pos_ori]]
print X_test_ss[tn_ori[index_least_pos_ori]]
print X_test_ig[tn_ori[index_least_pos_ori]]
print ""

top_negative_index_ss = np.argsort(abs(neg_evi_ss))[::-1]
tn_ss = top_negative_index_ss[:10]
pos_info_ss = pos_evi_ss[tp_ss]
index_least_pos_ss = np.argmin(pos_info_ss)
print "Index of test:",tn_ss[index_least_pos_ss]
print "Actual label", y_test[tn_ss[index_least_pos_ss]]
print X_test_ori[tn_ss[index_least_pos_ss]]
print X_test_ss[tn_ss[index_least_pos_ss]]
print X_test_ig[tn_ss[index_least_pos_ss]]
print ""

top_negative_index_ig = np.argsort(abs(neg_evi_ig))[::-1]
tn_ig = top_negative_index_ig[:10]
pos_info_ig = pos_evi_ig[tp_ig]
index_least_pos_ig = np.argmin(pos_info_ig)
print "Index of test:",tn_ig[index_least_pos_ig]
print "Actual label", y_test[tn_ig[index_least_pos_ig]]
print X_test_ori[tn_ig[index_least_pos_ig]]
print X_test_ss[tn_ig[index_least_pos_ig]]
print X_test_ig[tn_ig[index_least_pos_ig]]
print ""

Index of test: 208
Actual label 1
[   7.     168.      88.      42.     321.      38.2      0.787   40.   ]
[ 0.93691372  1.4742667   0.97680475  1.34636635  2.09431902  0.7878413
  0.95171097  0.57511787]
[ 0.11637315  1.28355176  0.92742088  0.73000782  1.63791587  1.18240172
  1.35372841  0.92966966]

Index of test: 244
Actual label 0
[  1.     71.     62.      0.      0.     21.8     0.416  26.   ]
[-0.84488505 -1.5615564  -0.36733675 -1.28821221 -0.69289057 -1.29363432
 -0.16875306 -0.61611067]
[-1.28010463 -1.79063393 -0.34168138 -1.54112761 -1.0108045  -0.53365033
  0.43974617 -0.2021021 ]

Index of test: 171
Actual label 0
[   1.      96.     122.       0.       0.      22.4      0.207   27.   ]
[-0.84488505 -0.77912776  2.73452825 -1.28821221 -0.69289057 -1.21748278
 -0.79995787 -0.53102292]
[-1.28010463 -0.99831803  2.58701614 -1.54112761 -1.0108045  -0.47086794
 -0.0751387  -0.12126126]



In [23]:
# The indices of top 10 negative based on negative evidence

print tn_ori
print tn_ss
print tn_ig

[145 111 160  87  74 171   0 118 220 208]
[238 204 152  10 213 201  19 171 165 244]
[238 204  10 152 213 201  19 244 165 171]
