In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split, ShuffleSplit
from classifiers import TransparentLogisticRegression, TransparentLinearRegression
from matplotlib import pylab as pl
from scipy.sparse import diags
from IPython import display
from scale import decision_tree_scale
from IPython.display import display, HTML
from ipy_table import *

In [2]:
# calculate # of instances between mean and splitting points for each feature
def get_num_instances(X, mean1, mean2):
    # m is # of instances, n is # of features, also the length of mean1 and mean2
    m, n = np.shape(X)
    result = []
    for i in range(n):
        current_feature = X[:,i]
        if mean1[i]>mean2[i]:
            a = mean2[i]
            b = mean1[i]
        else:
            a = mean1[i]
            b = mean2[i]
        num = ((current_feature > a) & (current_feature < b)).sum()
        result.append(num)
   
    return result

In [3]:
# combine header and table 
def combine_table(header, table):

    result = np.concatenate((header, table), axis=0)
    return result

In [4]:
# All information for test instances
class instance():
    def __init__(self, feature_name, ori_features, ss_features, ig_features, clf_ori, clf_ss, clf_ig):
        
        self.feature_name = feature_name
        self.ori_features = ori_features
        self.ss_features = ss_features
        self.ig_features = ig_features
        self.clf_ori = clf_ori
        self.clf_ss = clf_ss
        self.clf_ig = clf_ig
        
        self.neg_evi_ori, self.pos_evi_ori = self.evidence_prediction(self.clf_ori, self.ori_features)
        self.neg_evi_ss, self.pos_evi_ss = self.evidence_prediction(self.clf_ss, self.ss_features)
        self.neg_evi_ig, self.pos_evi_ig = self.evidence_prediction(self.clf_ig, self.ig_features)
        
        self.proba_ori = self.proba_prediction(self.clf_ori, self.ori_features)
        self.proba_ss = self.proba_prediction(self.clf_ss, self.ss_features)
        self.proba_ig = self.proba_prediction(self.clf_ig, self.ig_features)
                       
    def get_information(self, category):
        
        other_info = ['Bias', 'Neg_evi', 'Pos_evi','Proba']
        
        header = [['Features', 'Ori_value', 'ss_value', 'ig_value','w * v']]
        
        ori_evidence = np.multiply(self.ori_features, self.clf_ori.coef_).flatten()
        ss_evidence = np.multiply(self.ss_features, self.clf_ss.coef_).flatten()
        ig_evidence = np.multiply(self.ig_features, self.clf_ig.coef_).flatten()
        
        ori_features_round = np.around(self.ori_features, decimals=2)
        ss_features_round = np.around(self.ss_features, decimals=2)
        ig_features_round = np.around(self.ig_features, decimals=2)
        
        self.neg_evi_ori = np.around(self.neg_evi_ori, decimals=2); self.pos_evi_ori = np.around(self.pos_evi_ori, decimals=2)
        self.neg_evi_ss = np.around(self.neg_evi_ss, decimals=2); self.pos_evi_ss = np.around(self.pos_evi_ss, decimals=2)
        self.neg_evi_ig = np.around(self.neg_evi_ig, decimals=2); self.pos_evi_ig = np.around(self.pos_evi_ig, decimals=2)
        
        self.proba_ori = np.around(self.proba_ori[0], decimals=3)
        self.proba_ss = np.around(self.proba_ss[0], decimals=3)
        self.proba_ig = np.around(self.proba_ig[0], decimals=3)
        
        intercept_ori = round(self.clf_ori.intercept_[0], 2)
        intercept_ss = round(self.clf_ss.intercept_[0], 2)
        intercept_ig = round(self.clf_ig.intercept_[0], 2)
        
        if category==1:
            
            # print "This is ori classifier: "
            current_evidence = np.around(ori_evidence, decimals=2)
            value_array = np.array([self.feature_name, ori_features_round, ss_features_round, ig_features_round, current_evidence])
            value_array = combine_table(header, value_array.T)
            
            bias_list = np.array(['Bias', intercept_ori, intercept_ss, intercept_ig, 'None'])
            neg_list = np.array(['Neg_evi', self.neg_evi_ori[0], self.neg_evi_ss[0], self.neg_evi_ig[0], 'None'])
            pos_list = np.array(['Pos_evi', self.pos_evi_ori[0], self.pos_evi_ss[0], self.pos_evi_ig[0], 'None'])
            prob_list = np.array(['Proba', str(self.proba_ori), str(self.proba_ss), str(self.proba_ig), 'None'])
            
            other_array = np.array([bias_list, neg_list, pos_list, prob_list])
            
            result = np.concatenate((value_array, other_array), axis=0)               
            return make_table(result)
        
        elif category==2:
            
            # print "This is ss classifier: "
            current_evidence = np.around(ss_evidence, decimals=2)
            value_array = np.array([self.feature_name, ori_features_round, ss_features_round, ig_features_round, current_evidence])
            value_array = combine_table(header, value_array.T)
            
            bias_list = np.array(['Bias', intercept_ori, intercept_ss, intercept_ig, 'None'])
            neg_list = np.array(['Neg_evi', self.neg_evi_ori[0], self.neg_evi_ss[0], self.neg_evi_ig[0], 'None'])
            pos_list = np.array(['Pos_evi', self.pos_evi_ori[0], self.pos_evi_ss[0], self.pos_evi_ig[0], 'None'])
            prob_list = np.array(['Proba', str(self.proba_ori), str(self.proba_ss), str(self.proba_ig), 'None'])
            
            other_array = np.array([bias_list, neg_list, pos_list, prob_list])
            
            result = np.concatenate((value_array, other_array), axis=0)               
            return make_table(result)        
                
        elif category==3:
            
            # print "This is ig classifier: "
            current_evidence = np.around(ig_evidence, decimals=2)
            value_array = np.array([self.feature_name, ori_features_round, ss_features_round, ig_features_round, current_evidence])
            value_array = combine_table(header, value_array.T)
            
            bias_list = np.array(['Bias', intercept_ori, intercept_ss, intercept_ig, 'None'])
            neg_list = np.array(['Neg_evi', self.neg_evi_ori[0], self.neg_evi_ss[0], self.neg_evi_ig[0], 'None'])
            pos_list = np.array(['Pos_evi', self.pos_evi_ori[0], self.pos_evi_ss[0], self.pos_evi_ig[0], 'None'])
            prob_list = np.array(['Proba', str(self.proba_ori), str(self.proba_ss), str(self.proba_ig), 'None'])
            
            other_array = np.array([bias_list, neg_list, pos_list, prob_list])
            
            result = np.concatenate((value_array, other_array), axis=0)              
            return make_table(result)        
             
        else:
            
            raise "Error"
            
    def evidence_prediction(self, clf, features):
        
        features = features.reshape((1,-1))
        neg_evi, pos_evi = clf.predict_evidences(features)
        return neg_evi, pos_evi
    
    def proba_prediction(self, clf, features):
        features = features.reshape((1,-1))
        proba = clf.predict_proba(features)
        return proba

In [5]:
#diabetes  http://archive.ics.uci.edu/ml/datasets/Pima+Indians+Diabetes
# ['preg' 'plas' 'pres' 'skin' 'insu' 'mass' 'pedi' 'age']
# preg: the # of pregnant 
# plas: Plasma glucose concentration a 2 hours in an oral glucose tolerance test 
# pres: Diastolic blood pressure
# skin: Triceps skin fold thickness
# insu: 2-Hour serum insulin
# mass: Body mass index
# pedi: Diabetes pedigree function 
# Age 

dataset = "diabetes.csv"
class_index = 8
num_cols = 9
classes= ['tested_negative', 'tested_positive']

read_cols = [i for i in range(num_cols) if i != class_index]
file_path = "D:\\IIT_Master\\2016 Spring\\CS597\\uci\\uci\\uci-tar\\nominal\\"+dataset

In [6]:
with open(file_path, 'r') as f:
    header = f.readline()
    #print header
    #header = np.fromstring("a, b", dtype=np.str_, sep=',')
    header = np.array(header.split(','))
    feature_names = header[read_cols]

In [7]:
# Loading the data and splitting the train, test

X = np.loadtxt(file_path, dtype=float, delimiter=",", skiprows=1, \
                   usecols=read_cols)
y = np.loadtxt(file_path, dtype=int, delimiter=",", skiprows=1, \
                   usecols=(class_index,), converters={class_index: lambda x: classes.index(x)})

num_inst, num_feat = np.shape(X)
print "The shape of this data set:",np.shape(X)

ss = ShuffleSplit(num_inst, n_iter=1, test_size=0.33, random_state=40)

for i, j in ss:
    train_index = i 
    test_index = j
    
y_train = y[train_index]
y_test = y[test_index]

print "# of test instances: ",len(y_test)

The shape of this data set: (768L, 8L)
# of test instances:  254


In [8]:
# Determine binary features
num_features = X.shape[1]
non_binary = []
binary = []
for i in range(num_features):
    if len(np.unique(X[:,i])) != 2:
        non_binary.append(i)
    else:
        binary.append(i)

In [9]:
# Original features
X_original = np.copy(X)

X_train_ori = X_original[train_index]
X_test_ori = X_original[test_index]

# Standard scale non binary features
X_ss = np.copy(X)

if len(non_binary) > 0:
    X_ss[:,non_binary]=scale(X[:,non_binary])
    
if len(binary) > 0: 
    print "binary features exist"
    X_b = X_ss[:,binary]
    X_b[X_b == 0] = -1
    X_ss[:,binary] = X_b
    
X_train_ss = X_ss[train_index]
X_test_ss = X_ss[test_index]

# Information gain scaling non binary features

X_ig = np.copy(X_original)
scale_ = decision_tree_scale()

X_train_ig = X_ig[train_index]
X_test_ig = X_ig[test_index]

if len(non_binary) > 0: 
    print "IG Scale for non_binary features -- Train"
    X_train_ig[:,non_binary]=scale_.fit_transform(X_train_ig[:,non_binary], y_train)

if len(binary) > 0: 
    print "binary features exist"
    X_b = X_train_ig[:,binary]
    X_b[X_b == 0] = -1
    X_train_ig[:,binary] = X_b
    
if len(non_binary) > 0:
    
    print "IG Scale for non_binary features -- Test"
    X_test_ig[:,non_binary]=scale_.transform(X_test_ig[:,non_binary])
    
if len(binary) > 0:
    
    print "binary features exist"
    X_b = X_test_ig[:,binary]
    X_b[X_b == 0] = -1
    X_test_ig[:,binary] = X_b

IG Scale for non_binary features -- Train
IG Scale for non_binary features -- Test


In [10]:
num_train = get_num_instances(X_train_ori, np.mean(X, axis=0), scale_.mns)
num_test = get_num_instances(X_test_ori, np.mean(X, axis=0), scale_.mns)
num_all = get_num_instances(X, np.mean(X, axis=0), scale_.mns)

mean = np.around(np.mean(X, axis=0), decimals=2)
splitting = np.around(scale_.mns, decimals=2)

table1_header = [['Features', 'mean', 'splitting', '# of train', '# of test', '# of total']]

result1 = np.array([header[:class_index].tolist(), mean.tolist(), splitting.tolist(), num_train, num_test, num_all])
result1 = combine_table(table1_header,result1.T)

make_table(result1)

0,1,2,3,4,5
Features,mean,splitting,# of train,# of test,# of total
preg,3.85,6.5,118,57,175
plas,120.89,127.5,43,23,66
pres,69.11,69.0,0,0,0
skin,20.54,28.5,81,54,135
insu,79.8,122.5,57,32,89
mass,31.99,26.9,129,61,190
pedi,0.47,0.24,196,95,291
age,33.24,28.5,75,32,107


In [11]:
# fit train instances for each classifier

clf_ori = TransparentLogisticRegression()
clf_ss = TransparentLogisticRegression()
clf_ig = TransparentLogisticRegression()

clf_ori.fit(X_train_ori, y_train)
clf_ss.fit(X_train_ss, y_train)
clf_ig.fit(X_train_ig, y_train)

print clf_ori.coef_

[[ 0.1439551   0.02524085 -0.01827704  0.00804746 -0.00147434  0.0478546
   0.70378772  0.00439795]]


In [12]:
clf_linear_ori = TransparentLinearRegression()
clf_linear_ss = TransparentLinearRegression()
clf_linear_ig = TransparentLinearRegression()

y_linear_train = np.copy(y_train)
y_linear_train[y_linear_train == 0] = -1

clf_linear_ori.fit(X_train_ori, y_linear_train)
clf_linear_ss.fit(X_train_ss, y_linear_train)
clf_linear_ig.fit(X_train_ig, y_linear_train)

TransparentLinearRegression(copy_X=True, fit_intercept=True, n_jobs=1,
              normalize=False)

In [13]:
# print the weights for each classifiers

ori_weight = list(clf_ori.intercept_)+ clf_ori.coef_[0].tolist()
ss_weight = list(clf_ss.intercept_)+ clf_ss.coef_[0].tolist()
ig_weight = list(clf_ig.intercept_)+ clf_ig.coef_[0].tolist()

ori_weight = np.around(ori_weight, decimals=3)
ss_weight = np.around(ss_weight, decimals=3)
ig_weight = np.around(ig_weight, decimals=3)

ori_linear_weight = [clf_linear_ori.intercept_]
ori_linear_weight = ori_linear_weight + clf_linear_ori.coef_.tolist()

ss_linear_weight = [clf_linear_ss.intercept_]
ss_linear_weight = ss_linear_weight + clf_linear_ss.coef_.tolist()

ig_linear_weight = [clf_linear_ig.intercept_]
ig_linear_weight = ig_linear_weight + clf_linear_ig.coef_.tolist()

ori_linear_weight = np.around(ori_linear_weight, decimals=3)
ss_linear_weight = np.around(ss_linear_weight, decimals=3)
ig_linear_weight = np.around(ig_linear_weight, decimals=3)


table2_header = [['Features', 'ori_weight', 'ss_weight', 'ig_weight', 'ori_linear_w', 'ss_linear_w', 'ig_linear_w']]
features = ['Bias']
features.extend(header[:class_index].tolist())
table2 = np.array([features, ori_weight, ss_weight, ig_weight, ori_linear_weight, ss_linear_weight, ig_linear_weight])

result2 = combine_table(table2_header,table2.T)

make_table(result2)

0,1,2,3,4,5,6
Features,ori_weight,ss_weight,ig_weight,ori_linear_w,ss_linear_w,ig_linear_w
Bias,-5.214,-0.957,-1.001,-2.647,-0.344,-0.342
preg,0.144,0.53,0.687,0.056,0.188,0.24
plas,0.025,1.054,1.047,0.012,0.368,0.364
pres,-0.018,-0.281,-0.295,-0.005,-0.101,-0.107
skin,0.008,0.135,0.169,0.003,0.048,0.056
insu,-0.001,-0.245,-0.255,-0.001,-0.086,-0.09
mass,0.048,0.626,0.735,0.024,0.19,0.23
pedi,0.704,0.328,0.388,0.31,0.103,0.126
age,0.004,0.158,0.155,0.004,0.049,0.051


In [14]:
# Predict test instances for each classifier

y_predict_ori = clf_ori.predict(X_test_ori)
y_pred_prob_ori = clf_ori.predict_proba(X_test_ori)
neg_evi_ori, pos_evi_ori = clf_ori.predict_evidences(X_test_ori)

y_predict_ss = clf_ss.predict(X_test_ss)
y_pred_prob_ss = clf_ss.predict_proba(X_test_ss)
neg_evi_ss, pos_evi_ss = clf_ss.predict_evidences(X_test_ss)

y_predict_ig = clf_ig.predict(X_test_ig)
y_pred_prob_ig = clf_ig.predict_proba(X_test_ig)
neg_evi_ig, pos_evi_ig = clf_ig.predict_evidences(X_test_ig)

print accuracy_score(y_test,y_predict_ori)
print accuracy_score(y_test,y_predict_ss)
print accuracy_score(y_test,y_predict_ig)

0.732283464567
0.732283464567
0.740157480315


In [15]:
# most negative, "tested_negative" -- Probability

# Original features
Most_negative_1 = np.argmax(y_pred_prob_ori[:,0])
Most_neg_instance_ori = instance(header[:class_index],X_test_ori[Most_negative_1], X_test_ss[Most_negative_1],X_test_ig[Most_negative_1],
                              clf_ori, clf_ss, clf_ig)
Most_neg_instance_table1 = Most_neg_instance_ori.get_information(1)

# standard scaling 
Most_negative_2 = np.argmax(y_pred_prob_ss[:,0])
Most_neg_instance_ss = instance(header[:class_index],X_test_ori[Most_negative_2], X_test_ss[Most_negative_2],X_test_ig[Most_negative_2],
                              clf_ori, clf_ss, clf_ig)
Most_neg_instance_table2 = Most_neg_instance_ss.get_information(2)

# Information gain scaling
Most_negative_3 = np.argmax(y_pred_prob_ig[:,0])
Most_neg_instance_ig = instance(header[:class_index],X_test_ori[Most_negative_3], X_test_ss[Most_negative_3],X_test_ig[Most_negative_3],
                              clf_ori, clf_ss, clf_ig)
Most_neg_instance_table3 = Most_neg_instance_ig.get_information(3)

In [16]:
print "Original: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_1
Most_neg_instance_table1

Original: Most negative(tested_negative) instances based on probability
Index of test:  238


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,1.0,-0.84,-1.28,0.14
plas,0.0,-3.78,-4.04,0.0
pres,48.0,-1.09,-1.03,-0.88
skin,20.0,-0.03,-0.46,0.16
insu,0.0,-0.69,-1.01,-0.0
mass,24.7,-0.93,-0.23,1.18
pedi,0.14,-1.0,-0.24,0.1
age,22.0,-0.96,-0.53,0.1
Bias,-5.21,-0.96,-1.0,


In [17]:
print "Standard scaling: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_2
Most_neg_instance_table2

Standard scaling: Most negative(tested_negative) instances based on probability
Index of test:  238


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,1.0,-0.84,-1.28,-0.45
plas,0.0,-3.78,-4.04,-3.99
pres,48.0,-1.09,-1.03,0.31
skin,20.0,-0.03,-0.46,0.0
insu,0.0,-0.69,-1.01,0.17
mass,24.7,-0.93,-0.23,-0.58
pedi,0.14,-1.0,-0.24,-0.33
age,22.0,-0.96,-0.53,-0.15
Bias,-5.21,-0.96,-1.0,


In [18]:
print "Information gain scaling: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_3
Most_neg_instance_table3

Information gain scaling: Most negative(tested_negative) instances based on probability
Index of test:  238


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,1.0,-0.84,-1.28,-0.88
plas,0.0,-3.78,-4.04,-4.23
pres,48.0,-1.09,-1.03,0.3
skin,20.0,-0.03,-0.46,-0.08
insu,0.0,-0.69,-1.01,0.26
mass,24.7,-0.93,-0.23,-0.17
pedi,0.14,-1.0,-0.24,-0.09
age,22.0,-0.96,-0.53,-0.08
Bias,-5.21,-0.96,-1.0,


In [19]:
# most positive, "tested_positive" -- Probability

# Original featuers
Most_positive_1 = np.argmax(y_pred_prob_ori[:,1])
Most_pos_instance_ori = instance(header[:class_index],X_test_ori[Most_positive_1], X_test_ss[Most_positive_1],X_test_ig[Most_positive_1],
                              clf_ori, clf_ss, clf_ig)
Most_pos_instance_table1 = Most_pos_instance_ori.get_information(1)

# standard scaling
Most_positive_2 = np.argmax(y_pred_prob_ss[:,1])
Most_pos_instance_ss = instance(header[:class_index],X_test_ori[Most_positive_2], X_test_ss[Most_positive_2],X_test_ig[Most_positive_2],
                              clf_ori, clf_ss, clf_ig)
Most_pos_instance_table2 = Most_pos_instance_ss.get_information(2)

# Information gain scaling
Most_positive_3 = np.argmax(y_pred_prob_ig[:,1])
Most_pos_instance_ig = instance(header[:class_index],X_test_ori[Most_positive_3], X_test_ss[Most_positive_3],X_test_ig[Most_positive_3],
                              clf_ori, clf_ss, clf_ig)
Most_pos_instance_table3 = Most_pos_instance_ig.get_information(3)

In [20]:
print "Original: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_1
Most_pos_instance_table1

Original: Most positive(tested_positive) instances based on probability
Index of test:  199


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,17.0,3.91,2.44,2.45
plas,163.0,1.32,1.13,4.11
pres,72.0,0.15,0.15,-1.32
skin,41.0,1.28,0.68,0.33
insu,114.0,0.3,-0.07,-0.17
mass,40.9,1.13,1.46,1.96
pedi,0.82,1.04,1.43,0.57
age,47.0,1.17,1.5,0.21
Bias,-5.21,-0.96,-1.0,


In [21]:
print "Standard scaling: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_2
Most_pos_instance_table2

Standard scaling: Most positive(tested_positive) instances based on probability
Index of test:  199


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,17.0,3.91,2.44,2.07
plas,163.0,1.32,1.13,1.39
pres,72.0,0.15,0.15,-0.04
skin,41.0,1.28,0.68,0.17
insu,114.0,0.3,-0.07,-0.07
mass,40.9,1.13,1.46,0.71
pedi,0.82,1.04,1.43,0.34
age,47.0,1.17,1.5,0.18
Bias,-5.21,-0.96,-1.0,


In [22]:
print "Information gain scaling: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_3
Most_pos_instance_table3

Information gain scaling: Most positive(tested_positive) instances based on probability
Index of test:  199


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,17.0,3.91,2.44,1.68
plas,163.0,1.32,1.13,1.18
pres,72.0,0.15,0.15,-0.04
skin,41.0,1.28,0.68,0.11
insu,114.0,0.3,-0.07,0.02
mass,40.9,1.13,1.46,1.08
pedi,0.82,1.04,1.43,0.55
age,47.0,1.17,1.5,0.23
Bias,-5.21,-0.96,-1.0,


In [23]:
# most negative, "tested_negative" -- Evidence

# Original features
negative_evi_index_ori = np.argmax(abs(neg_evi_ori))
Most_neg_evi_instance_ori = instance(header[:class_index],X_test_ori[negative_evi_index_ori], X_test_ss[negative_evi_index_ori],
                                     X_test_ig[negative_evi_index_ori], clf_ori, clf_ss, clf_ig)
Most_neg_evi_instance_table1 = Most_neg_evi_instance_ori.get_information(1)

# Standard scaling
negative_evi_index_ss = np.argmax(abs(neg_evi_ss))
Most_neg_evi_instance_ss = instance(header[:class_index],X_test_ori[negative_evi_index_ss], X_test_ss[negative_evi_index_ss],
                                     X_test_ig[negative_evi_index_ss], clf_ori, clf_ss, clf_ig)
Most_neg_evi_instance_table2 = Most_neg_evi_instance_ss.get_information(2)

# Information gain scaling
negative_evi_index_ig = np.argmax(abs(neg_evi_ig))
Most_neg_evi_instance_ig = instance(header[:class_index],X_test_ori[negative_evi_index_ig], X_test_ss[negative_evi_index_ig],
                                     X_test_ig[negative_evi_index_ig], clf_ori, clf_ss, clf_ig)
Most_neg_evi_instance_table3 = Most_neg_evi_instance_ig.get_information(3)

In [24]:
print "Original: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ori
Most_neg_evi_instance_table1

Original: Most negative(tested_negative) instances based on evidence
Index of test:  145


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,9.0,1.53,0.58,1.3
plas,171.0,1.57,1.38,4.32
pres,110.0,2.11,2.0,-2.01
skin,24.0,0.22,-0.24,0.19
insu,240.0,1.39,0.97,-0.35
mass,45.4,1.7,1.94,2.17
pedi,0.72,0.75,1.19,0.51
age,54.0,1.77,2.06,0.24
Bias,-5.21,-0.96,-1.0,


In [25]:
print "Standard scaling: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ss
Most_neg_evi_instance_table2

Standard scaling: Most negative(tested_negative) instances based on evidence
Index of test:  238


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,1.0,-0.84,-1.28,-0.45
plas,0.0,-3.78,-4.04,-3.99
pres,48.0,-1.09,-1.03,0.31
skin,20.0,-0.03,-0.46,0.0
insu,0.0,-0.69,-1.01,0.17
mass,24.7,-0.93,-0.23,-0.58
pedi,0.14,-1.0,-0.24,-0.33
age,22.0,-0.96,-0.53,-0.15
Bias,-5.21,-0.96,-1.0,


In [26]:
print "Information gain scaling: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ig
Most_neg_evi_instance_table3

Information gain scaling: Most negative(tested_negative) instances based on evidence
Index of test:  238


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,1.0,-0.84,-1.28,-0.88
plas,0.0,-3.78,-4.04,-4.23
pres,48.0,-1.09,-1.03,0.3
skin,20.0,-0.03,-0.46,-0.08
insu,0.0,-0.69,-1.01,0.26
mass,24.7,-0.93,-0.23,-0.17
pedi,0.14,-1.0,-0.24,-0.09
age,22.0,-0.96,-0.53,-0.08
Bias,-5.21,-0.96,-1.0,


In [27]:
# most positive, "tested_positive" -- Evidence 

# Original features
positive_evi_index_ori = np.argmax(pos_evi_ori)
Most_pos_evi_instance_ori = instance(header[:class_index],X_test_ori[positive_evi_index_ori], X_test_ss[positive_evi_index_ori],
                                     X_test_ig[positive_evi_index_ori], clf_ori, clf_ss, clf_ig)
Most_pos_evi_instance_table1 = Most_pos_evi_instance_ori.get_information(1)

# Standard scaling
positive_evi_index_ss = np.argmax(pos_evi_ss)
Most_pos_evi_instance_ss = instance(header[:class_index],X_test_ori[positive_evi_index_ss], X_test_ss[positive_evi_index_ss],
                                     X_test_ig[positive_evi_index_ss], clf_ori, clf_ss, clf_ig)
Most_pos_evi_instance_table2 = Most_pos_evi_instance_ss.get_information(2)

# Information gain scaling
positive_evi_index_ig = np.argmax(pos_evi_ig)
Most_pos_evi_instance_ig = instance(header[:class_index],X_test_ori[positive_evi_index_ig], X_test_ss[positive_evi_index_ig],
                                     X_test_ig[positive_evi_index_ig], clf_ori, clf_ss, clf_ig)
Most_pos_evi_instance_table3 = Most_pos_evi_instance_ig.get_information(3)

In [28]:
print "Original: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ori
Most_pos_evi_instance_table1

Original: Most positive(tested_positive) instances based on evidence
Index of test:  199


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,17.0,3.91,2.44,2.45
plas,163.0,1.32,1.13,4.11
pres,72.0,0.15,0.15,-1.32
skin,41.0,1.28,0.68,0.33
insu,114.0,0.3,-0.07,-0.17
mass,40.9,1.13,1.46,1.96
pedi,0.82,1.04,1.43,0.57
age,47.0,1.17,1.5,0.21
Bias,-5.21,-0.96,-1.0,


In [29]:
print "Standard scaling: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ss
Most_pos_evi_instance_table2

Standard scaling: Most positive(tested_positive) instances based on evidence
Index of test:  199


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,17.0,3.91,2.44,2.07
plas,163.0,1.32,1.13,1.39
pres,72.0,0.15,0.15,-0.04
skin,41.0,1.28,0.68,0.17
insu,114.0,0.3,-0.07,-0.07
mass,40.9,1.13,1.46,0.71
pedi,0.82,1.04,1.43,0.34
age,47.0,1.17,1.5,0.18
Bias,-5.21,-0.96,-1.0,


In [30]:
print "Information gain scaling: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ig
Most_pos_evi_instance_table3

Information gain scaling: Most positive(tested_positive) instances based on evidence
Index of test:  199


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,17.0,3.91,2.44,1.68
plas,163.0,1.32,1.13,1.18
pres,72.0,0.15,0.15,-0.04
skin,41.0,1.28,0.68,0.11
insu,114.0,0.3,-0.07,0.02
mass,40.9,1.13,1.46,1.08
pedi,0.82,1.04,1.43,0.55
age,47.0,1.17,1.5,0.23
Bias,-5.21,-0.96,-1.0,


In [31]:
# unc_1 Top 1 uncertain instances

# Original features
uncertains_ori = np.min(y_pred_prob_ori, axis=1)
uis_ori = np.argsort(uncertains_ori)[::-1]
top10_uis_ori = uis_ori[:10]
uncertains_1_ori = instance(header[:class_index],X_test_ori[uis_ori[0]], X_test_ss[uis_ori[0]],
                                     X_test_ig[uis_ori[0]], clf_ori, clf_ss, clf_ig)
uncertains_1_ori_table = uncertains_1_ori.get_information(1)

# Standard scaling
uncertains_ss = np.min(y_pred_prob_ss, axis=1)
uis_ss = np.argsort(uncertains_ss)[::-1]
top10_uis_ss = uis_ss[:10]
uncertains_1_ss = instance(header[:class_index],X_test_ori[uis_ss[0]], X_test_ss[uis_ss[0]], X_test_ig[uis_ss[0]], clf_ori, clf_ss, clf_ig)
uncertains_1_ss_table = uncertains_1_ss.get_information(2)

# Information gain scaling
uncertains_ig = np.min(y_pred_prob_ig, axis=1)
uis_ig = np.argsort(uncertains_ig)[::-1]
top10_uis_ig = uis_ig[:10]
uncertains_1_ig = instance(header[:class_index],X_test_ori[uis_ig[0]], X_test_ss[uis_ig[0]], X_test_ig[uis_ig[0]], clf_ori, clf_ss, clf_ig)
uncertains_1_ig_table = uncertains_1_ig.get_information(3)

In [32]:
print "Original: the most uncertain instance based on probability"
print "Index of test: ",uis_ori[0]
print "Actual label", y_test[uis_ori[0]]
uncertains_1_ori_table

Original: the most uncertain instance based on probability
Index of test:  45
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,12.0,2.42,1.28,1.73
plas,121.0,0.0,-0.21,3.05
pres,78.0,0.46,0.44,-1.43
skin,17.0,-0.22,-0.62,0.14
insu,0.0,-0.69,-1.01,-0.0
mass,26.5,-0.7,-0.04,1.27
pedi,0.26,-0.64,0.05,0.18
age,62.0,2.45,2.71,0.27
Bias,-5.21,-0.96,-1.0,


In [33]:
print "standard scaling: the most uncertain instance based on probability"
print "Index of test: ",uis_ss[0]
print "Actual label", y_test[uis_ss[0]]
uncertains_1_ss_table

standard scaling: the most uncertain instance based on probability
Index of test:  111
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,0.0,-1.14,-1.51,-0.6
plas,181.0,1.88,1.7,1.98
pres,88.0,0.98,0.93,-0.27
skin,44.0,1.47,0.84,0.2
insu,510.0,3.74,3.2,-0.92
mass,43.3,1.44,1.72,0.9
pedi,0.22,-0.75,-0.04,-0.25
age,26.0,-0.62,-0.2,-0.1
Bias,-5.21,-0.96,-1.0,


In [34]:
print "Information gain scaling: the most uncertain instance based on probability"
print "Index of test: ",uis_ig[0]
print "Actual label", y_test[uis_ig[0]]
uncertains_1_ig_table

Information gain scaling: the most uncertain instance based on probability
Index of test:  111
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,0.0,-1.14,-1.51,-1.04
plas,181.0,1.88,1.7,1.78
pres,88.0,0.98,0.93,-0.27
skin,44.0,1.47,0.84,0.14
insu,510.0,3.74,3.2,-0.82
mass,43.3,1.44,1.72,1.26
pedi,0.22,-0.75,-0.04,-0.01
age,26.0,-0.62,-0.2,-0.03
Bias,-5.21,-0.96,-1.0,


In [35]:
# The indices of top 10 uncertain instances for each classifier

print top10_uis_ori
print top10_uis_ss
print top10_uis_ig

[ 45 182  40  75 197 126 235 188  78 179]
[111 182 197 186  58 151  45 179  40 172]
[111  58 197 186 182  45  51  40 151  78]


In [36]:
# unc_ce from Top 10 uncertain instances

# Original features 
min_evidence_top10_ori = np.min([abs(neg_evi_ori[top10_uis_ori]),abs(pos_evi_ori[top10_uis_ori])], axis=0)
index_ce_ori = np.argmax(min_evidence_top10_ori)
uncertains_ce_ori = instance(header[:class_index],X_test_ori[top10_uis_ori[index_ce_ori]], X_test_ss[top10_uis_ori[index_ce_ori]],
                                     X_test_ig[top10_uis_ori[index_ce_ori]], clf_ori, clf_ss, clf_ig)
uncertains_ce_ori_table = uncertains_ce_ori.get_information(1)


# Standard scaling
min_evidence_top10_ss = np.min([abs(neg_evi_ss[top10_uis_ss]),abs(pos_evi_ss[top10_uis_ss])], axis=0)
index_ce_ss = np.argmax(min_evidence_top10_ss)
uncertains_ce_ss = instance(header[:class_index],X_test_ori[top10_uis_ss[index_ce_ss]], X_test_ss[top10_uis_ss[index_ce_ss]],
                                     X_test_ig[top10_uis_ss[index_ce_ss]], clf_ori, clf_ss, clf_ig)
uncertains_ce_ss_table = uncertains_ce_ss.get_information(2)


# Information gain scaling
min_evidence_top10_ig = np.min([abs(neg_evi_ig[top10_uis_ig]),abs(pos_evi_ig[top10_uis_ig])], axis=0)
index_ce_ig = np.argmax(min_evidence_top10_ig)
uncertains_ce_ig = instance(header[:class_index],X_test_ori[top10_uis_ig[index_ce_ig]], X_test_ss[top10_uis_ig[index_ce_ig]],
                                     X_test_ig[top10_uis_ig[index_ce_ig]], clf_ori, clf_ss, clf_ig)
uncertains_ce_ig_table = uncertains_ce_ig.get_information(3)

In [37]:
print "Original: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ori[index_ce_ori]
print "Actual label", y_test[top10_uis_ori[index_ce_ori]]
uncertains_ce_ori_table

Original: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  40
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,3.0,-0.25,-0.81,0.43
plas,158.0,1.16,0.97,3.99
pres,76.0,0.36,0.34,-1.39
skin,36.0,0.97,0.41,0.29
insu,245.0,1.43,1.01,-0.36
mass,31.6,-0.05,0.49,1.51
pedi,0.85,1.14,1.51,0.6
age,28.0,-0.45,-0.04,0.12
Bias,-5.21,-0.96,-1.0,


In [38]:
print "Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ss[index_ce_ss]
print "Actual label", y_test[top10_uis_ss[index_ce_ss]]
uncertains_ce_ss_table

Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  111
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,0.0,-1.14,-1.51,-0.6
plas,181.0,1.88,1.7,1.98
pres,88.0,0.98,0.93,-0.27
skin,44.0,1.47,0.84,0.2
insu,510.0,3.74,3.2,-0.92
mass,43.3,1.44,1.72,0.9
pedi,0.22,-0.75,-0.04,-0.25
age,26.0,-0.62,-0.2,-0.1
Bias,-5.21,-0.96,-1.0,


In [39]:
print "Information gain scaling: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ig[index_ce_ig]
print "Actual label", y_test[top10_uis_ig[index_ce_ig]]
uncertains_ce_ig_table

Information gain scaling: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  111
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,0.0,-1.14,-1.51,-1.04
plas,181.0,1.88,1.7,1.78
pres,88.0,0.98,0.93,-0.27
skin,44.0,1.47,0.84,0.14
insu,510.0,3.74,3.2,-0.82
mass,43.3,1.44,1.72,1.26
pedi,0.22,-0.75,-0.04,-0.01
age,26.0,-0.62,-0.2,-0.03
Bias,-5.21,-0.96,-1.0,


In [40]:
# unc_ie from Top 10 uncertain instances 

# Original features
max_evidence_top10_ori = np.max([abs(neg_evi_ori[top10_uis_ori]),abs(pos_evi_ori[top10_uis_ori])], axis=0)
index_ie_ori = np.argmin(max_evidence_top10_ori)
uncertains_ie_ori = instance(header[:class_index],X_test_ori[top10_uis_ori[index_ie_ori]], X_test_ss[top10_uis_ori[index_ie_ori]],
                                     X_test_ig[top10_uis_ori[index_ie_ori]], clf_ori, clf_ss, clf_ig)
uncertains_ie_ori_table = uncertains_ie_ori.get_information(1)

# Standard scaling
max_evidence_top10_ss = np.max([abs(neg_evi_ss[top10_uis_ss]),abs(pos_evi_ss[top10_uis_ss])], axis=0)
index_ie_ss = np.argmin(max_evidence_top10_ss)
uncertains_ie_ss = instance(header[:class_index],X_test_ori[top10_uis_ss[index_ie_ss]], X_test_ss[top10_uis_ss[index_ie_ss]],
                                     X_test_ig[top10_uis_ss[index_ie_ss]], clf_ori, clf_ss, clf_ig)
uncertains_ie_ss_table = uncertains_ie_ss.get_information(2)

# IG scaling
max_evidence_top10_ig = np.max([abs(neg_evi_ig[top10_uis_ig]),abs(pos_evi_ig[top10_uis_ig])], axis=0)
index_ie_ig = np.argmin(max_evidence_top10_ig)
uncertains_ie_ig = instance(header[:class_index],X_test_ori[top10_uis_ig[index_ie_ig]], X_test_ss[top10_uis_ig[index_ie_ig]],
                                     X_test_ig[top10_uis_ig[index_ie_ig]], clf_ori, clf_ss, clf_ig)
uncertains_ie_ig_table = uncertains_ie_ig.get_information(3)

In [41]:
print "Original: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ori[index_ie_ori]
print "Actual label", y_test[top10_uis_ori[index_ie_ori]]
uncertains_ie_ori_table

Original: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  197
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,4.0,0.05,-0.58,0.58
plas,136.0,0.47,0.27,3.43
pres,70.0,0.05,0.05,-1.28
skin,0.0,-1.29,-1.54,0.0
insu,0.0,-0.69,-1.01,-0.0
mass,31.2,-0.1,0.45,1.49
pedi,1.18,2.14,2.33,0.83
age,22.0,-0.96,-0.53,0.1
Bias,-5.21,-0.96,-1.0,


In [42]:
print "Standard scaling: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ss[index_ie_ss]
print "Actual label", y_test[top10_uis_ss[index_ie_ss]]
uncertains_ie_ss_table

Standard scaling: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  186
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,4.0,0.05,-0.58,0.02
plas,154.0,1.04,0.84,1.09
pres,72.0,0.15,0.15,-0.04
skin,29.0,0.53,0.03,0.07
insu,126.0,0.4,0.03,-0.1
mass,31.3,-0.09,0.46,-0.06
pedi,0.34,-0.4,0.25,-0.13
age,37.0,0.32,0.69,0.05
Bias,-5.21,-0.96,-1.0,


In [43]:
print "Information gain scaling: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ig[index_ie_ig]
print "Actual label", y_test[top10_uis_ig[index_ie_ig]]
uncertains_ie_ig_table

Information gain scaling: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  182
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,10.0,1.83,0.81,0.56
plas,122.0,0.03,-0.17,-0.18
pres,78.0,0.46,0.44,-0.13
skin,31.0,0.66,0.14,0.02
insu,0.0,-0.69,-1.01,0.26
mass,27.6,-0.56,0.07,0.05
pedi,0.51,0.12,0.68,0.26
age,45.0,1.0,1.33,0.21
Bias,-5.21,-0.96,-1.0,


In [44]:
# Least negative among Top 10 possitive 

# Original features
top_positive_index_ori = np.argsort(pos_evi_ori)[::-1]
tp_ori = top_positive_index_ori[:10]
neg_info_ori = neg_evi_ori[tp_ori]
index_least_neg_ori = np.argmin(neg_info_ori)
least_neg_instance_ori = instance(header[:class_index],X_test_ori[tp_ori[index_least_neg_ori]], X_test_ss[tp_ori[index_least_neg_ori]],
                                  X_test_ig[tp_ori[index_least_neg_ori]], clf_ori, clf_ss, clf_ig)
least_neg_instance_ori_table1 = least_neg_instance_ori.get_information(1)

# Standard scaling
top_positive_index_ss = np.argsort(pos_evi_ss)[::-1]
tp_ss = top_positive_index_ss[:10]
neg_info_ss = neg_evi_ss[tp_ss]
index_least_neg_ss = np.argmin(neg_info_ss)
least_neg_instance_ss = instance(header[:class_index],X_test_ori[tp_ss[index_least_neg_ss]], X_test_ss[tp_ss[index_least_neg_ss]],
                                  X_test_ig[tp_ss[index_least_neg_ss]], clf_ori, clf_ss, clf_ig)
least_neg_instance_ss_table1 = least_neg_instance_ss.get_information(2)

# Information gain scaling
top_positive_index_ig = np.argsort(pos_evi_ig)[::-1]
tp_ig = top_positive_index_ig[:10]
neg_info_ig = neg_evi_ig[tp_ig]
index_least_neg_ig = np.argmin(neg_info_ig)
least_neg_instance_ig = instance(header[:class_index],X_test_ori[tp_ig[index_least_neg_ig]], X_test_ss[tp_ig[index_least_neg_ig]],
                                  X_test_ig[tp_ig[index_least_neg_ig]], clf_ori, clf_ss, clf_ig)
least_neg_instance_ig_table1 = least_neg_instance_ig.get_information(3)

In [45]:
print "Original: Least negative in top 10 positive instances"
print "Index of test:",tp_ori[index_least_neg_ori]
print "Actual label", y_test[tp_ori[index_least_neg_ori]]
least_neg_instance_ori_table1

Original: Least negative in top 10 positive instances
Index of test: 145
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,9.0,1.53,0.58,1.3
plas,171.0,1.57,1.38,4.32
pres,110.0,2.11,2.0,-2.01
skin,24.0,0.22,-0.24,0.19
insu,240.0,1.39,0.97,-0.35
mass,45.4,1.7,1.94,2.17
pedi,0.72,0.75,1.19,0.51
age,54.0,1.77,2.06,0.24
Bias,-5.21,-0.96,-1.0,


In [46]:
print "Standard scaling: Least negative in top 10 positive instances"
print "Index of test:",tp_ss[index_least_neg_ss]
print "Actual label", y_test[tp_ss[index_least_neg_ss]]
least_neg_instance_ss_table1

Standard scaling: Least negative in top 10 positive instances
Index of test: 0
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,3.0,-0.25,-0.81,-0.13
plas,173.0,1.63,1.44,1.72
pres,82.0,0.67,0.63,-0.19
skin,48.0,1.72,1.05,0.23
insu,465.0,3.34,2.83,-0.82
mass,38.4,0.81,1.2,0.51
pedi,2.14,5.03,4.68,1.65
age,25.0,-0.7,-0.28,-0.11
Bias,-5.21,-0.96,-1.0,


In [47]:
print "Information gain scaling: Least negative in top 10 positive instances"
print "Index of test:",tp_ig[index_least_neg_ig]
print "Actual label", y_test[tp_ig[index_least_neg_ig]]
least_neg_instance_ig_table1

Information gain scaling: Least negative in top 10 positive instances
Index of test: 0
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,3.0,-0.25,-0.81,-0.56
plas,173.0,1.63,1.44,1.51
pres,82.0,0.67,0.63,-0.19
skin,48.0,1.72,1.05,0.18
insu,465.0,3.34,2.83,-0.72
mass,38.4,0.81,1.2,0.88
pedi,2.14,5.03,4.68,1.82
age,25.0,-0.7,-0.28,-0.04
Bias,-5.21,-0.96,-1.0,


In [48]:
# The indices of top 10 positive based on positive evidence

print tp_ori
print tp_ss
print tp_ig

[199   6 145   0 247  53  39 206 220 207]
[199  39   0   6 145 247  53 220  23 233]
[199   0  39 145   6  23  53 233 247 139]


In [49]:
# Least positive among Top 10 negative instances

# Original features
top_negative_index_ori = np.argsort(abs(neg_evi_ori))[::-1]
tn_ori = top_negative_index_ori[:10]
pos_info_ori = pos_evi_ori[tp_ori]
index_least_pos_ori = np.argmin(pos_info_ori)
least_pos_instance_ori = instance(header[:class_index],X_test_ori[tn_ori[index_least_pos_ori]], X_test_ss[tn_ori[index_least_pos_ori]],
                                  X_test_ig[tn_ori[index_least_pos_ori]], clf_ori, clf_ss, clf_ig)
least_pos_instance_ori_table1 = least_pos_instance_ori.get_information(1)

# Standard scaling
top_negative_index_ss = np.argsort(abs(neg_evi_ss))[::-1]
tn_ss = top_negative_index_ss[:10]
pos_info_ss = pos_evi_ss[tp_ss]
index_least_pos_ss = np.argmin(pos_info_ss)
least_pos_instance_ss = instance(header[:class_index],X_test_ori[tn_ss[index_least_pos_ss]], X_test_ss[tn_ss[index_least_pos_ss]],
                                  X_test_ig[tn_ss[index_least_pos_ss]], clf_ori, clf_ss, clf_ig)
least_pos_instance_ss_table1 = least_pos_instance_ss.get_information(2)


# Information gain scaling
top_negative_index_ig = np.argsort(abs(neg_evi_ig))[::-1]
tn_ig = top_negative_index_ig[:10]
pos_info_ig = pos_evi_ig[tp_ig]
index_least_pos_ig = np.argmin(pos_info_ig)
least_pos_instance_ig = instance(header[:class_index],X_test_ori[tn_ig[index_least_pos_ig]], X_test_ss[tn_ig[index_least_pos_ig]],
                                  X_test_ig[tn_ig[index_least_pos_ig]], clf_ori, clf_ss, clf_ig)
least_pos_instance_ig_table1 = least_pos_instance_ig.get_information(3)

In [50]:
print "Original: Least positive in top 10 negative instances"
print "Index of test:",tn_ori[index_least_pos_ori]
print "Actual label", y_test[tn_ori[index_least_pos_ori]]
least_pos_instance_ori_table1

Original: Least positive in top 10 negative instances
Index of test: 208
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,7.0,0.94,0.12,1.01
plas,168.0,1.47,1.28,4.24
pres,88.0,0.98,0.93,-1.61
skin,42.0,1.35,0.73,0.34
insu,321.0,2.09,1.64,-0.47
mass,38.2,0.79,1.18,1.83
pedi,0.79,0.95,1.35,0.55
age,40.0,0.58,0.93,0.18
Bias,-5.21,-0.96,-1.0,


In [51]:
print "Standard scaling: Least positive in top 10 negative instances"
print "Index of test:",tn_ss[index_least_pos_ss]
print "Actual label", y_test[tn_ss[index_least_pos_ss]]
least_pos_instance_ss_table1

Standard scaling: Least positive in top 10 negative instances
Index of test: 244
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,1.0,-0.84,-1.28,-0.45
plas,71.0,-1.56,-1.79,-1.65
pres,62.0,-0.37,-0.34,0.1
skin,0.0,-1.29,-1.54,-0.17
insu,0.0,-0.69,-1.01,0.17
mass,21.8,-1.29,-0.53,-0.81
pedi,0.42,-0.17,0.44,-0.06
age,26.0,-0.62,-0.2,-0.1
Bias,-5.21,-0.96,-1.0,


In [52]:
print "Information gain scaling: Least positive in top 10 negative instances"
print "Index of test:",tn_ig[index_least_pos_ig]
print "Actual label", y_test[tn_ig[index_least_pos_ig]]
least_pos_instance_ig_table1

Information gain scaling: Least positive in top 10 negative instances
Index of test: 171
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,1.0,-0.84,-1.28,-0.88
plas,96.0,-0.78,-1.0,-1.05
pres,122.0,2.73,2.59,-0.76
skin,0.0,-1.29,-1.54,-0.26
insu,0.0,-0.69,-1.01,0.26
mass,22.4,-1.22,-0.47,-0.35
pedi,0.21,-0.8,-0.08,-0.03
age,27.0,-0.53,-0.12,-0.02
Bias,-5.21,-0.96,-1.0,


In [53]:
# The indices of top 10 negative based on negative evidence

print tn_ori
print tn_ss
print tn_ig

[145 111 160  87  74 171   0 118 220 208]
[238 204 152  10 213 201  19 171 165 244]
[238 204  10 152 213 201  19 244 165 171]
