In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split, ShuffleSplit
from classifiers import TransparentLogisticRegression, TransparentLinearRegression
from matplotlib import pylab as pl
from scipy.sparse import diags
from IPython import display
from scale import decision_tree_scale
from IPython.display import display, HTML
from ipy_table import *

In [2]:
# calculate # of instances between mean and splitting points for each feature
def get_num_instances(X, mean1, mean2):
    # m is # of instances, n is # of features, also the length of mean1 and mean2
    m, n = np.shape(X)
    result = []
    for i in range(n):
        current_feature = X[:,i]
        if mean1[i]>mean2[i]:
            a = mean2[i]
            b = mean1[i]
        else:
            a = mean1[i]
            b = mean2[i]
        num = ((current_feature > a) & (current_feature < b)).sum()
        result.append(num)
   
    return result

In [3]:
# combine header and table 
def combine_table(header, table):

    result = np.concatenate((header, table), axis=0)
    return result

In [4]:
# All information for test instances
class instance():
    def __init__(self, feature_name, ori_features, ss_features, ig_features, clf_ori, clf_ss, clf_ig):
        
        self.feature_name = feature_name
        self.ori_features = ori_features
        self.ss_features = ss_features
        self.ig_features = ig_features
        self.clf_ori = clf_ori
        self.clf_ss = clf_ss
        self.clf_ig = clf_ig
        
        self.neg_evi_ori, self.pos_evi_ori = self.evidence_prediction(self.clf_ori, self.ori_features)
        self.neg_evi_ss, self.pos_evi_ss = self.evidence_prediction(self.clf_ss, self.ss_features)
        self.neg_evi_ig, self.pos_evi_ig = self.evidence_prediction(self.clf_ig, self.ig_features)
        
        self.proba_ori = self.proba_prediction(self.clf_ori, self.ori_features)
        self.proba_ss = self.proba_prediction(self.clf_ss, self.ss_features)
        self.proba_ig = self.proba_prediction(self.clf_ig, self.ig_features)
                       
    def get_information(self, category):
        
        other_info = ['Bias', 'Neg_evi', 'Pos_evi','Proba']
        
        header = [['Features', 'Ori_value', 'ss_value', 'ig_value','w * v']]
        
        ori_evidence = np.multiply(self.ori_features, self.clf_ori.coef_).flatten()
        ss_evidence = np.multiply(self.ss_features, self.clf_ss.coef_).flatten()
        ig_evidence = np.multiply(self.ig_features, self.clf_ig.coef_).flatten()
        
        ori_features_round = np.around(self.ori_features, decimals=2)
        ss_features_round = np.around(self.ss_features, decimals=2)
        ig_features_round = np.around(self.ig_features, decimals=2)
        
        self.neg_evi_ori = np.around(self.neg_evi_ori, decimals=2); self.pos_evi_ori = np.around(self.pos_evi_ori, decimals=2)
        self.neg_evi_ss = np.around(self.neg_evi_ss, decimals=2); self.pos_evi_ss = np.around(self.pos_evi_ss, decimals=2)
        self.neg_evi_ig = np.around(self.neg_evi_ig, decimals=2); self.pos_evi_ig = np.around(self.pos_evi_ig, decimals=2)
        
        self.proba_ori = np.around(self.proba_ori[0], decimals=3)
        self.proba_ss = np.around(self.proba_ss[0], decimals=3)
        self.proba_ig = np.around(self.proba_ig[0], decimals=3)
        
        intercept_ori = round(self.clf_ori.intercept_[0], 2)
        intercept_ss = round(self.clf_ss.intercept_[0], 2)
        intercept_ig = round(self.clf_ig.intercept_[0], 2)
        
        if category==1:
            
            # print "This is ori classifier: "
            current_evidence = np.around(ori_evidence, decimals=2)
            value_array = np.array([self.feature_name, ori_features_round, ss_features_round, ig_features_round, current_evidence])
            value_array = combine_table(header, value_array.T)
            
            bias_list = np.array(['Bias', intercept_ori, intercept_ss, intercept_ig, 'None'])
            neg_list = np.array(['Neg_evi', self.neg_evi_ori[0], self.neg_evi_ss[0], self.neg_evi_ig[0], 'None'])
            pos_list = np.array(['Pos_evi', self.pos_evi_ori[0], self.pos_evi_ss[0], self.pos_evi_ig[0], 'None'])
            prob_list = np.array(['Proba', str(self.proba_ori), str(self.proba_ss), str(self.proba_ig), 'None'])
            
            other_array = np.array([bias_list, neg_list, pos_list, prob_list])
            
            result = np.concatenate((value_array, other_array), axis=0)               
            return make_table(result)
        
        elif category==2:
            
            # print "This is ss classifier: "
            current_evidence = np.around(ss_evidence, decimals=2)
            value_array = np.array([self.feature_name, ori_features_round, ss_features_round, ig_features_round, current_evidence])
            value_array = combine_table(header, value_array.T)
            
            bias_list = np.array(['Bias', intercept_ori, intercept_ss, intercept_ig, 'None'])
            neg_list = np.array(['Neg_evi', self.neg_evi_ori[0], self.neg_evi_ss[0], self.neg_evi_ig[0], 'None'])
            pos_list = np.array(['Pos_evi', self.pos_evi_ori[0], self.pos_evi_ss[0], self.pos_evi_ig[0], 'None'])
            prob_list = np.array(['Proba', str(self.proba_ori), str(self.proba_ss), str(self.proba_ig), 'None'])
            
            other_array = np.array([bias_list, neg_list, pos_list, prob_list])
            
            result = np.concatenate((value_array, other_array), axis=0)               
            return make_table(result)        
                
        elif category==3:
            
            # print "This is ig classifier: "
            current_evidence = np.around(ig_evidence, decimals=2)
            value_array = np.array([self.feature_name, ori_features_round, ss_features_round, ig_features_round, current_evidence])
            value_array = combine_table(header, value_array.T)
            
            bias_list = np.array(['Bias', intercept_ori, intercept_ss, intercept_ig, 'None'])
            neg_list = np.array(['Neg_evi', self.neg_evi_ori[0], self.neg_evi_ss[0], self.neg_evi_ig[0], 'None'])
            pos_list = np.array(['Pos_evi', self.pos_evi_ori[0], self.pos_evi_ss[0], self.pos_evi_ig[0], 'None'])
            prob_list = np.array(['Proba', str(self.proba_ori), str(self.proba_ss), str(self.proba_ig), 'None'])
            
            other_array = np.array([bias_list, neg_list, pos_list, prob_list])
            
            result = np.concatenate((value_array, other_array), axis=0)              
            return make_table(result)        
             
        else:
            
            raise "Error"
            
    def evidence_prediction(self, clf, features):
        
        features = features.reshape((1,-1))
        neg_evi, pos_evi = clf.predict_evidences(features)
        return neg_evi, pos_evi
    
    def proba_prediction(self, clf, features):
        features = features.reshape((1,-1))
        proba = clf.predict_proba(features)
        return proba

In [5]:
#diabetes  http://archive.ics.uci.edu/ml/datasets/Pima+Indians+Diabetes
# ['preg' 'plas' 'pres' 'skin' 'insu' 'mass' 'pedi' 'age']
# preg: the # of pregnant 
# plas: Plasma glucose concentration a 2 hours in an oral glucose tolerance test 
# pres: Diastolic blood pressure
# skin: Triceps skin fold thickness
# insu: 2-Hour serum insulin
# mass: Body mass index
# pedi: Diabetes pedigree function 
# Age 

dataset = "diabetes.csv"
class_index = 8
num_cols = 9
classes= ['tested_negative', 'tested_positive']

read_cols = [i for i in range(num_cols) if i != class_index]
file_path = "D:\\IIT_Master\\2016 Spring\\CS597\\uci\\uci\\uci-tar\\nominal\\"+dataset

In [6]:


with open(file_path, 'r') as f:
    header = f.readline()
    #print header
    #header = np.fromstring("a, b", dtype=np.str_, sep=',')
    header = np.array(header.split(','))
    feature_names = header[read_cols]

In [7]:
# Loading the data and splitting the train, test

X = np.loadtxt(file_path, dtype=float, delimiter=",", skiprows=1, \
                   usecols=read_cols)
y = np.loadtxt(file_path, dtype=int, delimiter=",", skiprows=1, \
                   usecols=(class_index,), converters={class_index: lambda x: classes.index(x)})

num_inst, num_feat = np.shape(X)
print "The shape of this data set:",np.shape(X)

ss = ShuffleSplit(num_inst, n_iter=1, test_size=0.33, random_state=24)

for i, j in ss:
    train_index = i 
    test_index = j
    
y_train = y[train_index]
y_test = y[test_index]

print "# of test instances: ",len(y_test)

The shape of this data set: (768L, 8L)
# of test instances:  254


In [8]:
# Determine binary features
num_features = X.shape[1]
non_binary = []
binary = []
for i in range(num_features):
    if len(np.unique(X[:,i])) != 2:
        non_binary.append(i)
    else:
        binary.append(i)

In [9]:
# Original features
X_original = np.copy(X)

X_train_ori = X_original[train_index]
X_test_ori = X_original[test_index]

# Standard scale non binary features
X_ss = np.copy(X)

if len(non_binary) > 0:
    X_ss[:,non_binary]=scale(X[:,non_binary])
    
if len(binary) > 0: 
    print "binary features exist"
    X_b = X_ss[:,binary]
    X_b[X_b == 0] = -1
    X_ss[:,binary] = X_b
    
X_train_ss = X_ss[train_index]
X_test_ss = X_ss[test_index]

# Information gain scaling non binary features

X_ig = np.copy(X_original)
scale_ = decision_tree_scale()

X_train_ig = X_ig[train_index]
X_test_ig = X_ig[test_index]

if len(non_binary) > 0: 
    print "IG Scale for non_binary features -- Train"
    X_train_ig[:,non_binary]=scale_.fit_transform(X_train_ig[:,non_binary], y_train)

if len(binary) > 0: 
    print "binary features exist"
    X_b = X_train_ig[:,binary]
    X_b[X_b == 0] = -1
    X_train_ig[:,binary] = X_b
    
if len(non_binary) > 0:
    
    print "IG Scale for non_binary features -- Test"
    X_test_ig[:,non_binary]=scale_.transform(X_test_ig[:,non_binary])
    
if len(binary) > 0:
    
    print "binary features exist"
    X_b = X_test_ig[:,binary]
    X_b[X_b == 0] = -1
    X_test_ig[:,binary] = X_b

IG Scale for non_binary features -- Train
IG Scale for non_binary features -- Test


In [10]:
num_train = get_num_instances(X_train_ori, np.mean(X, axis=0), scale_.mns)
num_test = get_num_instances(X_test_ori, np.mean(X, axis=0), scale_.mns)
num_all = get_num_instances(X, np.mean(X, axis=0), scale_.mns)

mean = np.around(np.mean(X, axis=0), decimals=2)
splitting = np.around(scale_.mns, decimals=2)

table1_header = [['Features', 'mean', 'splitting', '# of train', '# of test', '# of total']]

result1 = np.array([header[:class_index].tolist(), mean.tolist(), splitting.tolist(), num_train, num_test, num_all])
result1 = combine_table(table1_header,result1.T)

make_table(result1)

0,1,2,3,4,5
Features,mean,splitting,# of train,# of test,# of total
preg,3.85,6.5,113,62,175
plas,120.89,133.5,67,46,113
pres,69.11,81.0,189,96,285
skin,20.54,32.5,155,74,229
insu,79.8,143.0,81,45,126
mass,31.99,27.85,100,51,151
pedi,0.47,0.72,102,57,159
age,33.24,28.5,71,36,107


In [11]:
# fit train instances for each classifier

clf_ori = TransparentLogisticRegression()
clf_ss = TransparentLogisticRegression()
clf_ig = TransparentLogisticRegression()

clf_ori.fit(X_train_ori, y_train)
clf_ss.fit(X_train_ss, y_train)
clf_ig.fit(X_train_ig, y_train)

TransparentLogisticRegression(C=1.0, class_weight=None, dual=False,
               fit_intercept=True, intercept_scaling=1, max_iter=100,
               multi_class='ovr', n_jobs=1, penalty='l2',
               random_state=None, solver='liblinear', tol=0.0001,
               verbose=0, warm_start=False)

In [12]:
clf_linear_ori = TransparentLinearRegression()
clf_linear_ss = TransparentLinearRegression()
clf_linear_ig = TransparentLinearRegression()

y_linear_train = np.copy(y_train)
y_linear_train[y_linear_train == 0] = -1

clf_linear_ori.fit(X_train_ori, y_linear_train)
clf_linear_ss.fit(X_train_ss, y_linear_train)
clf_linear_ig.fit(X_train_ig, y_linear_train)

TransparentLinearRegression(copy_X=True, fit_intercept=True, n_jobs=1,
              normalize=False)

In [13]:
# print the weights for each classifiers

ori_weight = list(clf_ori.intercept_)+ clf_ori.coef_[0].tolist()
ss_weight = list(clf_ss.intercept_)+ clf_ss.coef_[0].tolist()
ig_weight = list(clf_ig.intercept_)+ clf_ig.coef_[0].tolist()

ori_weight = np.around(ori_weight, decimals=2)
ss_weight = np.around(ss_weight, decimals=2)
ig_weight = np.around(ig_weight, decimals=2)

ori_linear_weight = [clf_linear_ori.intercept_]
ori_linear_weight = ori_linear_weight + clf_linear_ori.coef_.tolist()

ss_linear_weight = [clf_linear_ss.intercept_]
ss_linear_weight = ss_linear_weight + clf_linear_ss.coef_.tolist()

ig_linear_weight = [clf_linear_ig.intercept_]
ig_linear_weight = ig_linear_weight + clf_linear_ig.coef_.tolist()

ori_linear_weight = np.around(ori_linear_weight, decimals=2)
ss_linear_weight = np.around(ss_linear_weight, decimals=2)
ig_linear_weight = np.around(ig_linear_weight, decimals=2)


table2_header = [['Features', 'ori_weight', 'ss_weight', 'ig_weight', 'ori_linear_w', 'ss_linear_w', 'ig_linear_w']]
features = ['Bias']
features.extend(header[:class_index].tolist())
table2 = np.array([features, ori_weight, ss_weight, ig_weight, ori_linear_weight, ss_linear_weight, ig_linear_weight])

result2 = combine_table(table2_header,table2.T)

make_table(result2)

0,1,2,3,4,5,6
Features,ori_weight,ss_weight,ig_weight,ori_linear_w,ss_linear_w,ig_linear_w
Bias,-5.56,-1.05,-0.59,-2.82,-0.34,-0.19
preg,0.13,0.48,0.61,0.04,0.15,0.19
plas,0.03,1.31,1.44,0.01,0.42,0.46
pres,-0.02,-0.27,-0.3,0.0,-0.08,-0.09
skin,0.0,0.0,0.01,0.0,0.0,0.0
insu,0.0,-0.14,-0.16,0.0,-0.04,-0.05
mass,0.04,0.68,0.75,0.02,0.19,0.22
pedi,0.55,0.28,0.36,0.26,0.09,0.11
age,0.0,0.15,0.16,0.0,0.05,0.05


In [14]:
# Predict test instances for each classifier

y_predict_ori = clf_ori.predict(X_test_ori)
y_pred_prob_ori = clf_ori.predict_proba(X_test_ori)
neg_evi_ori, pos_evi_ori = clf_ori.predict_evidences(X_test_ori)

y_predict_ss = clf_ss.predict(X_test_ss)
y_pred_prob_ss = clf_ss.predict_proba(X_test_ss)
neg_evi_ss, pos_evi_ss = clf_ss.predict_evidences(X_test_ss)

y_predict_ig = clf_ig.predict(X_test_ig)
y_pred_prob_ig = clf_ig.predict_proba(X_test_ig)
neg_evi_ig, pos_evi_ig = clf_ig.predict_evidences(X_test_ig)

print accuracy_score(y_test,y_predict_ori)
print accuracy_score(y_test,y_predict_ss)
print accuracy_score(y_test,y_predict_ig)

0.700787401575
0.708661417323
0.708661417323


In [15]:
# most negative, "tested_negative" -- Probability

# Original features
Most_negative_1 = np.argmax(y_pred_prob_ori[:,0])
Most_neg_instance_ori = instance(header[:class_index],X_test_ori[Most_negative_1], X_test_ss[Most_negative_1],X_test_ig[Most_negative_1],
                              clf_ori, clf_ss, clf_ig)
Most_neg_instance_table1 = Most_neg_instance_ori.get_information(1)

# standard scaling 
Most_negative_2 = np.argmax(y_pred_prob_ss[:,0])
Most_neg_instance_ss = instance(header[:class_index],X_test_ori[Most_negative_2], X_test_ss[Most_negative_2],X_test_ig[Most_negative_2],
                              clf_ori, clf_ss, clf_ig)
Most_neg_instance_table2 = Most_neg_instance_ss.get_information(2)

# Information gain scaling
Most_negative_3 = np.argmax(y_pred_prob_ig[:,0])
Most_neg_instance_ig = instance(header[:class_index],X_test_ori[Most_negative_3], X_test_ss[Most_negative_3],X_test_ig[Most_negative_3],
                              clf_ori, clf_ss, clf_ig)
Most_neg_instance_table3 = Most_neg_instance_ig.get_information(3)

In [16]:
print "Original: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_1
Most_neg_instance_table1

Original: Most negative(benign) instances based on probability
Index of test:  74


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,5.0,0.34,-0.35,0.65
plas,0.0,-3.78,-3.82,0.0
pres,80.0,0.56,-0.05,-1.64
skin,32.0,0.72,-0.03,0.01
insu,0.0,-0.69,-1.07,-0.0
mass,41.0,1.14,1.5,1.84
pedi,0.35,-0.38,-0.88,0.19
age,37.0,0.32,0.67,0.1
Bias,-5.56,-1.05,-0.59,


In [17]:
print "Standard scaling: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_2
Most_neg_instance_table2

Standard scaling: Most negative(benign) instances based on probability
Index of test:  43


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,2.0,-0.55,-1.05,-0.26
plas,74.0,-1.47,-1.7,-1.93
pres,0.0,-3.57,-3.71,0.98
skin,0.0,-1.29,-1.67,-0.01
insu,0.0,-0.69,-1.07,0.1
mass,0.0,-4.06,-3.17,-2.78
pedi,0.1,-1.12,-1.45,-0.31
age,22.0,-0.96,-0.51,-0.15
Bias,-5.56,-1.05,-0.59,


In [18]:
print "Information gain scaling: Most negative(tested_negative) instances based on probability"
print "Index of test: ",Most_negative_3
Most_neg_instance_table3

Information gain scaling: Most negative(benign) instances based on probability
Index of test:  43


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,2.0,-0.55,-1.05,-0.64
plas,74.0,-1.47,-1.7,-2.45
pres,0.0,-3.57,-3.71,1.12
skin,0.0,-1.29,-1.67,-0.02
insu,0.0,-0.69,-1.07,0.17
mass,0.0,-4.06,-3.17,-2.37
pedi,0.1,-1.12,-1.45,-0.53
age,22.0,-0.96,-0.51,-0.08
Bias,-5.56,-1.05,-0.59,


In [19]:
# most positive, "tested_positive" -- Probability

# Original featuers
Most_positive_1 = np.argmax(y_pred_prob_ori[:,1])
Most_pos_instance_ori = instance(header[:class_index],X_test_ori[Most_positive_1], X_test_ss[Most_positive_1],X_test_ig[Most_positive_1],
                              clf_ori, clf_ss, clf_ig)
Most_pos_instance_table1 = Most_pos_instance_ori.get_information(1)

# standard scaling
Most_positive_2 = np.argmax(y_pred_prob_ss[:,1])
Most_pos_instance_ss = instance(header[:class_index],X_test_ori[Most_positive_2], X_test_ss[Most_positive_2],X_test_ig[Most_positive_2],
                              clf_ori, clf_ss, clf_ig)
Most_pos_instance_table2 = Most_pos_instance_ss.get_information(2)

# Information gain scaling
Most_positive_3 = np.argmax(y_pred_prob_ig[:,1])
Most_pos_instance_ig = instance(header[:class_index],X_test_ori[Most_positive_3], X_test_ss[Most_positive_3],X_test_ig[Most_positive_3],
                              clf_ori, clf_ss, clf_ig)
Most_pos_instance_table3 = Most_pos_instance_ig.get_information(3)

In [20]:
print "Original: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_1
Most_pos_instance_table1

Original: Most positive(malignant) instances based on probability
Index of test:  239


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,17.0,3.91,2.45,2.21
plas,163.0,1.32,0.84,5.21
pres,72.0,0.15,-0.41,-1.47
skin,41.0,1.28,0.44,0.02
insu,114.0,0.3,-0.22,-0.06
mass,40.9,1.13,1.49,1.84
pedi,0.82,1.04,0.23,0.45
age,47.0,1.17,1.47,0.13
Bias,-5.56,-1.05,-0.59,


In [21]:
print "Standard scaling: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_2
Most_pos_instance_table2

Standard scaling: Most positive(malignant) instances based on probability
Index of test:  239


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,17.0,3.91,2.45,1.86
plas,163.0,1.32,0.84,1.73
pres,72.0,0.15,-0.41,-0.04
skin,41.0,1.28,0.44,0.01
insu,114.0,0.3,-0.22,-0.04
mass,40.9,1.13,1.49,0.77
pedi,0.82,1.04,0.23,0.29
age,47.0,1.17,1.47,0.18
Bias,-5.56,-1.05,-0.59,


In [22]:
print "Information gain scaling: Most positive(tested_positive) instances based on probability"
print "Index of test: ",Most_positive_3
Most_pos_instance_table3

Information gain scaling: Most positive(malignant) instances based on probability
Index of test:  239


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,17.0,3.91,2.45,1.5
plas,163.0,1.32,0.84,1.22
pres,72.0,0.15,-0.41,0.12
skin,41.0,1.28,0.44,0.01
insu,114.0,0.3,-0.22,0.03
mass,40.9,1.13,1.49,1.11
pedi,0.82,1.04,0.23,0.08
age,47.0,1.17,1.47,0.23
Bias,-5.56,-1.05,-0.59,


In [23]:
# most negative, "tested_negative" -- Evidence

# Original features
negative_evi_index_ori = np.argmax(abs(neg_evi_ori))
Most_neg_evi_instance_ori = instance(header[:class_index],X_test_ori[negative_evi_index_ori], X_test_ss[negative_evi_index_ori],
                                     X_test_ig[negative_evi_index_ori], clf_ori, clf_ss, clf_ig)
Most_neg_evi_instance_table1 = Most_neg_evi_instance_ori.get_information(1)

# Standard scaling
negative_evi_index_ss = np.argmax(abs(neg_evi_ss))
Most_neg_evi_instance_ss = instance(header[:class_index],X_test_ori[negative_evi_index_ss], X_test_ss[negative_evi_index_ss],
                                     X_test_ig[negative_evi_index_ss], clf_ori, clf_ss, clf_ig)
Most_neg_evi_instance_table2 = Most_neg_evi_instance_ss.get_information(2)

# Information gain scaling
negative_evi_index_ig = np.argmax(abs(neg_evi_ig))
Most_neg_evi_instance_ig = instance(header[:class_index],X_test_ori[negative_evi_index_ig], X_test_ss[negative_evi_index_ig],
                                     X_test_ig[negative_evi_index_ig], clf_ori, clf_ss, clf_ig)
Most_neg_evi_instance_table3 = Most_neg_evi_instance_ig.get_information(3)

In [24]:
print "Original: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ori
Most_neg_evi_instance_table1

Original: Most negative(benign) instances based on evidence
Index of test:  21


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,13.0,2.72,1.52,1.69
plas,158.0,1.16,0.7,5.05
pres,114.0,2.32,1.51,-2.33
skin,0.0,-1.29,-1.67,0.0
insu,0.0,-0.69,-1.07,-0.0
mass,42.3,1.31,1.65,1.9
pedi,0.26,-0.65,-1.09,0.14
age,44.0,0.92,1.23,0.12
Bias,-5.56,-1.05,-0.59,


In [25]:
print "Standard scaling: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ss
Most_neg_evi_instance_table2

Standard scaling: Most negative(benign) instances based on evidence
Index of test:  43


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,2.0,-0.55,-1.05,-0.26
plas,74.0,-1.47,-1.7,-1.93
pres,0.0,-3.57,-3.71,0.98
skin,0.0,-1.29,-1.67,-0.01
insu,0.0,-0.69,-1.07,0.1
mass,0.0,-4.06,-3.17,-2.78
pedi,0.1,-1.12,-1.45,-0.31
age,22.0,-0.96,-0.51,-0.15
Bias,-5.56,-1.05,-0.59,


In [26]:
print "Information gain scaling: Most negative(tested_negative) instances based on evidence"
print "Index of test: ",negative_evi_index_ig
Most_neg_evi_instance_table3

Information gain scaling: Most negative(benign) instances based on evidence
Index of test:  43


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,2.0,-0.55,-1.05,-0.64
plas,74.0,-1.47,-1.7,-2.45
pres,0.0,-3.57,-3.71,1.12
skin,0.0,-1.29,-1.67,-0.02
insu,0.0,-0.69,-1.07,0.17
mass,0.0,-4.06,-3.17,-2.37
pedi,0.1,-1.12,-1.45,-0.53
age,22.0,-0.96,-0.51,-0.08
Bias,-5.56,-1.05,-0.59,


In [27]:
# most positive, "tested_positive" -- Evidence 

# Original features
positive_evi_index_ori = np.argmax(pos_evi_ori)
Most_pos_evi_instance_ori = instance(header[:class_index],X_test_ori[positive_evi_index_ori], X_test_ss[positive_evi_index_ori],
                                     X_test_ig[positive_evi_index_ori], clf_ori, clf_ss, clf_ig)
Most_pos_evi_instance_table1 = Most_pos_evi_instance_ori.get_information(1)

# Standard scaling
positive_evi_index_ss = np.argmax(pos_evi_ss)
Most_pos_evi_instance_ss = instance(header[:class_index],X_test_ori[positive_evi_index_ss], X_test_ss[positive_evi_index_ss],
                                     X_test_ig[positive_evi_index_ss], clf_ori, clf_ss, clf_ig)
Most_pos_evi_instance_table2 = Most_pos_evi_instance_ss.get_information(2)

# Information gain scaling
positive_evi_index_ig = np.argmax(pos_evi_ig)
Most_pos_evi_instance_ig = instance(header[:class_index],X_test_ori[positive_evi_index_ig], X_test_ss[positive_evi_index_ig],
                                     X_test_ig[positive_evi_index_ig], clf_ori, clf_ss, clf_ig)
Most_pos_evi_instance_table3 = Most_pos_evi_instance_ig.get_information(3)

In [28]:
print "Original: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ori
Most_pos_evi_instance_table1

Original: Most positive(malignant) instances based on evidence
Index of test:  239


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,17.0,3.91,2.45,2.21
plas,163.0,1.32,0.84,5.21
pres,72.0,0.15,-0.41,-1.47
skin,41.0,1.28,0.44,0.02
insu,114.0,0.3,-0.22,-0.06
mass,40.9,1.13,1.49,1.84
pedi,0.82,1.04,0.23,0.45
age,47.0,1.17,1.47,0.13
Bias,-5.56,-1.05,-0.59,


In [29]:
print "Standard scaling: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ss
Most_pos_evi_instance_table2

Standard scaling: Most positive(malignant) instances based on evidence
Index of test:  210


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,8.0,1.23,0.35,0.59
plas,188.0,2.1,1.56,2.76
pres,78.0,0.46,-0.14,-0.13
skin,0.0,-1.29,-1.67,-0.01
insu,0.0,-0.69,-1.07,0.1
mass,47.9,2.02,2.29,1.38
pedi,0.14,-1.01,-1.37,-0.28
age,43.0,0.83,1.15,0.13
Bias,-5.56,-1.05,-0.59,


In [30]:
print "Information gain scaling: Most positive(tested_positive) instances based on evidence"
print "Index of test: ",positive_evi_index_ig
Most_pos_evi_instance_table3

Information gain scaling: Most positive(malignant) instances based on evidence
Index of test:  210


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,8.0,1.23,0.35,0.21
plas,188.0,2.1,1.56,2.25
pres,78.0,0.46,-0.14,0.04
skin,0.0,-1.29,-1.67,-0.02
insu,0.0,-0.69,-1.07,0.17
mass,47.9,2.02,2.29,1.71
pedi,0.14,-1.01,-1.37,-0.5
age,43.0,0.83,1.15,0.18
Bias,-5.56,-1.05,-0.59,


In [31]:
# unc_1 Top 1 uncertain instances

# Original features
uncertains_ori = np.min(y_pred_prob_ori, axis=1)
uis_ori = np.argsort(uncertains_ori)[::-1]
top10_uis_ori = uis_ori[:10]
uncertains_1_ori = instance(header[:class_index],X_test_ori[uis_ori[0]], X_test_ss[uis_ori[0]],
                                     X_test_ig[uis_ori[0]], clf_ori, clf_ss, clf_ig)
uncertains_1_ori_table = uncertains_1_ori.get_information(1)

# Standard scaling
uncertains_ss = np.min(y_pred_prob_ss, axis=1)
uis_ss = np.argsort(uncertains_ss)[::-1]
top10_uis_ss = uis_ss[:10]
uncertains_1_ss = instance(header[:class_index],X_test_ori[uis_ss[0]], X_test_ss[uis_ss[0]], X_test_ig[uis_ss[0]], clf_ori, clf_ss, clf_ig)
uncertains_1_ss_table = uncertains_1_ss.get_information(2)

# Information gain scaling
uncertains_ig = np.min(y_pred_prob_ig, axis=1)
uis_ig = np.argsort(uncertains_ig)[::-1]
top10_uis_ig = uis_ig[:10]
uncertains_1_ig = instance(header[:class_index],X_test_ori[uis_ig[0]], X_test_ss[uis_ig[0]], X_test_ig[uis_ig[0]], clf_ori, clf_ss, clf_ig)
uncertains_1_ig_table = uncertains_1_ig.get_information(3)

In [32]:
print "Original: the most uncertain instance based on probability"
print "Index of test: ",uis_ori[0]
print "Actual label", y_test[uis_ori[0]]
uncertains_1_ori_table

Original: the most uncertain instance based on probability
Index of test:  116
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,9.0,1.53,0.58,1.17
plas,134.0,0.41,0.01,4.28
pres,74.0,0.25,-0.32,-1.51
skin,33.0,0.78,0.03,0.01
insu,60.0,-0.17,-0.62,-0.03
mass,25.9,-0.77,-0.22,1.16
pedi,0.46,-0.04,-0.61,0.25
age,81.0,4.06,4.16,0.22
Bias,-5.56,-1.05,-0.59,


In [33]:
print "standard scaling: the most uncertain instance based on probability"
print "Index of test: ",uis_ss[0]
print "Actual label", y_test[uis_ss[0]]
uncertains_1_ss_table

standard scaling: the most uncertain instance based on probability
Index of test:  172
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,1.0,-0.84,-1.28,-0.4
plas,153.0,1.0,0.56,1.32
pres,82.0,0.67,0.05,-0.18
skin,42.0,1.35,0.49,0.01
insu,485.0,3.52,2.56,-0.49
mass,40.6,1.09,1.45,0.75
pedi,0.69,0.65,-0.07,0.18
age,23.0,-0.87,-0.44,-0.13
Bias,-5.56,-1.05,-0.59,


In [34]:
print "Information gain scaling: the most uncertain instance based on probability"
print "Index of test: ",uis_ig[0]
print "Actual label", y_test[uis_ig[0]]
uncertains_1_ig_table

Information gain scaling: the most uncertain instance based on probability
Index of test:  172
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,1.0,-0.84,-1.28,-0.79
plas,153.0,1.0,0.56,0.8
pres,82.0,0.67,0.05,-0.01
skin,42.0,1.35,0.49,0.01
insu,485.0,3.52,2.56,-0.4
mass,40.6,1.09,1.45,1.09
pedi,0.69,0.65,-0.07,-0.03
age,23.0,-0.87,-0.44,-0.07
Bias,-5.56,-1.05,-0.59,


In [35]:
# The indices of top 10 uncertain instances for each classifier

print top10_uis_ori
print top10_uis_ss
print top10_uis_ig

[116  45 146 219  71 228 195   7 220 133]
[172 175  61  45 146  69   4   8  40  78]
[172 175  69  45 146  61   4 110  40   8]


In [36]:
# unc_ce from Top 10 uncertain instances

# Original features 
min_evidence_top10_ori = np.min([abs(neg_evi_ori[top10_uis_ori]),abs(pos_evi_ori[top10_uis_ori])], axis=0)
index_ce_ori = np.argmax(min_evidence_top10_ori)
uncertains_ce_ori = instance(header[:class_index],X_test_ori[top10_uis_ori[index_ce_ori]], X_test_ss[top10_uis_ori[index_ce_ori]],
                                     X_test_ig[top10_uis_ori[index_ce_ori]], clf_ori, clf_ss, clf_ig)
uncertains_ce_ori_table = uncertains_ce_ori.get_information(1)


# Standard scaling
min_evidence_top10_ss = np.min([abs(neg_evi_ss[top10_uis_ss]),abs(pos_evi_ss[top10_uis_ss])], axis=0)
index_ce_ss = np.argmax(min_evidence_top10_ss)
uncertains_ce_ss = instance(header[:class_index],X_test_ori[top10_uis_ss[index_ce_ss]], X_test_ss[top10_uis_ss[index_ce_ss]],
                                     X_test_ig[top10_uis_ss[index_ce_ss]], clf_ori, clf_ss, clf_ig)
uncertains_ce_ss_table = uncertains_ce_ss.get_information(2)


# Information gain scaling
min_evidence_top10_ig = np.min([abs(neg_evi_ig[top10_uis_ig]),abs(pos_evi_ig[top10_uis_ig])], axis=0)
index_ce_ig = np.argmax(min_evidence_top10_ig)
uncertains_ce_ig = instance(header[:class_index],X_test_ori[top10_uis_ig[index_ce_ig]], X_test_ss[top10_uis_ig[index_ce_ig]],
                                     X_test_ig[top10_uis_ig[index_ce_ig]], clf_ori, clf_ss, clf_ig)
uncertains_ce_ig_table = uncertains_ce_ig.get_information(3)

In [37]:
print "Original: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ori[index_ce_ori]
print "Actual label", y_test[top10_uis_ori[index_ce_ori]]
uncertains_ce_ori_table

Original: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  146
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,11.0,2.12,1.05,1.43
plas,136.0,0.47,0.07,4.35
pres,84.0,0.77,0.14,-1.72
skin,35.0,0.91,0.13,0.01
insu,130.0,0.44,-0.1,-0.07
mass,28.3,-0.47,0.05,1.27
pedi,0.26,-0.64,-1.08,0.14
age,42.0,0.75,1.07,0.11
Bias,-5.56,-1.05,-0.59,


In [38]:
print "Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ss[index_ce_ss]
print "Actual label", y_test[top10_uis_ss[index_ce_ss]]
uncertains_ce_ss_table

Standard scaling: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  172
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,1.0,-0.84,-1.28,-0.4
plas,153.0,1.0,0.56,1.32
pres,82.0,0.67,0.05,-0.18
skin,42.0,1.35,0.49,0.01
insu,485.0,3.52,2.56,-0.49
mass,40.6,1.09,1.45,0.75
pedi,0.69,0.65,-0.07,0.18
age,23.0,-0.87,-0.44,-0.13
Bias,-5.56,-1.05,-0.59,


In [39]:
print "Information gain scaling: the most conflicting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ig[index_ce_ig]
print "Actual label", y_test[top10_uis_ig[index_ce_ig]]
uncertains_ce_ig_table

Information gain scaling: the most conflicting instance among 10 uncertain instances based on evidence
Index of test:  69
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,5.0,0.34,-0.35,-0.21
plas,115.0,-0.18,-0.53,-0.76
pres,98.0,1.49,0.78,-0.23
skin,0.0,-1.29,-1.67,-0.02
insu,0.0,-0.69,-1.07,0.17
mass,52.9,2.65,2.86,2.13
pedi,0.21,-0.79,-1.2,-0.44
age,28.0,-0.45,-0.04,-0.01
Bias,-5.56,-1.05,-0.59,


In [40]:
# unc_ie from Top 10 uncertain instances 

# Original features
max_evidence_top10_ori = np.max([abs(neg_evi_ori[top10_uis_ori]),abs(pos_evi_ori[top10_uis_ori])], axis=0)
index_ie_ori = np.argmin(max_evidence_top10_ori)
uncertains_ie_ori = instance(header[:class_index],X_test_ori[top10_uis_ori[index_ie_ori]], X_test_ss[top10_uis_ori[index_ie_ori]],
                                     X_test_ig[top10_uis_ori[index_ie_ori]], clf_ori, clf_ss, clf_ig)
uncertains_ie_ori_table = uncertains_ie_ori.get_information(1)

# Standard scaling
max_evidence_top10_ss = np.max([abs(neg_evi_ss[top10_uis_ss]),abs(pos_evi_ss[top10_uis_ss])], axis=0)
index_ie_ss = np.argmin(max_evidence_top10_ss)
uncertains_ie_ss = instance(header[:class_index],X_test_ori[top10_uis_ss[index_ie_ss]], X_test_ss[top10_uis_ss[index_ie_ss]],
                                     X_test_ig[top10_uis_ss[index_ie_ss]], clf_ori, clf_ss, clf_ig)
uncertains_ie_ss_table = uncertains_ie_ss.get_information(2)

# IG scaling
max_evidence_top10_ig = np.max([abs(neg_evi_ig[top10_uis_ig]),abs(pos_evi_ig[top10_uis_ig])], axis=0)
index_ie_ig = np.argmin(max_evidence_top10_ig)
uncertains_ie_ig = instance(header[:class_index],X_test_ori[top10_uis_ig[index_ie_ig]], X_test_ss[top10_uis_ig[index_ie_ig]],
                                     X_test_ig[top10_uis_ig[index_ie_ig]], clf_ori, clf_ss, clf_ig)
uncertains_ie_ig_table = uncertains_ie_ig.get_information(3)

In [41]:
print "Original: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ori[index_ie_ori]
print "Actual label", y_test[top10_uis_ori[index_ie_ori]]
uncertains_ie_ori_table

Original: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  7
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,0.0,-1.14,-1.52,0.0
plas,119.0,-0.06,-0.41,3.8
pres,0.0,-3.57,-3.71,-0.0
skin,0.0,-1.29,-1.67,0.0
insu,0.0,-0.69,-1.07,-0.0
mass,32.4,0.05,0.52,1.45
pedi,0.14,-1.0,-1.36,0.08
age,24.0,-0.79,-0.36,0.06
Bias,-5.56,-1.05,-0.59,


In [42]:
print "Standard scaling: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ss[index_ie_ss]
print "Actual label", y_test[top10_uis_ss[index_ie_ss]]
uncertains_ie_ss_table

Standard scaling: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  45
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,8.0,1.23,0.35,0.59
plas,133.0,0.38,-0.01,0.5
pres,72.0,0.15,-0.41,-0.04
skin,0.0,-1.29,-1.67,-0.01
insu,0.0,-0.69,-1.07,0.1
mass,32.9,0.12,0.58,0.08
pedi,0.27,-0.61,-1.06,-0.17
age,39.0,0.49,0.83,0.07
Bias,-5.56,-1.05,-0.59,


In [43]:
print "Information gain scaling: the most leasting instance among 10 uncertain instances based on evidence"
print "Index of test: ", top10_uis_ig[index_ie_ig]
print "Actual label", y_test[top10_uis_ig[index_ie_ig]]
uncertains_ie_ig_table

Information gain scaling: the most leasting instance among 10 uncertain instances based on evidence
Index of test:  146
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,11.0,2.12,1.05,0.64
plas,136.0,0.47,0.07,0.1
pres,84.0,0.77,0.14,-0.04
skin,35.0,0.91,0.13,0.0
insu,130.0,0.44,-0.1,0.02
mass,28.3,-0.47,0.05,0.04
pedi,0.26,-0.64,-1.08,-0.39
age,42.0,0.75,1.07,0.17
Bias,-5.56,-1.05,-0.59,


In [44]:
# Least negative among Top 10 possitive 

# Original features
top_positive_index_ori = np.argsort(pos_evi_ori)[::-1]
tp_ori = top_positive_index_ori[:10]
neg_info_ori = neg_evi_ori[tp_ori]
index_least_neg_ori = np.argmin(neg_info_ori)
least_neg_instance_ori = instance(header[:class_index],X_test_ori[tp_ori[index_least_neg_ori]], X_test_ss[tp_ori[index_least_neg_ori]],
                                  X_test_ig[tp_ori[index_least_neg_ori]], clf_ori, clf_ss, clf_ig)
least_neg_instance_ori_table1 = least_neg_instance_ori.get_information(1)

# Standard scaling
top_positive_index_ss = np.argsort(pos_evi_ss)[::-1]
tp_ss = top_positive_index_ss[:10]
neg_info_ss = neg_evi_ss[tp_ss]
index_least_neg_ss = np.argmin(neg_info_ss)
least_neg_instance_ss = instance(header[:class_index],X_test_ori[tp_ss[index_least_neg_ss]], X_test_ss[tp_ss[index_least_neg_ss]],
                                  X_test_ig[tp_ss[index_least_neg_ss]], clf_ori, clf_ss, clf_ig)
least_neg_instance_ss_table1 = least_neg_instance_ss.get_information(2)

# Information gain scaling
top_positive_index_ig = np.argsort(pos_evi_ig)[::-1]
tp_ig = top_positive_index_ig[:10]
neg_info_ig = neg_evi_ig[tp_ig]
index_least_neg_ig = np.argmin(neg_info_ig)
least_neg_instance_ig = instance(header[:class_index],X_test_ori[tp_ig[index_least_neg_ig]], X_test_ss[tp_ig[index_least_neg_ig]],
                                  X_test_ig[tp_ig[index_least_neg_ig]], clf_ori, clf_ss, clf_ig)
least_neg_instance_ig_table1 = least_neg_instance_ig.get_information(3)

In [45]:
print "Original: Least negative in top 10 positive instances"
print "Index of test:",tp_ori[index_least_neg_ori]
print "Actual label", y_test[tp_ori[index_least_neg_ori]]
least_neg_instance_ori_table1

Original: Least negative in top 10 positive instances
Index of test: 21
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,13.0,2.72,1.52,1.69
plas,158.0,1.16,0.7,5.05
pres,114.0,2.32,1.51,-2.33
skin,0.0,-1.29,-1.67,0.0
insu,0.0,-0.69,-1.07,-0.0
mass,42.3,1.31,1.65,1.9
pedi,0.26,-0.65,-1.09,0.14
age,44.0,0.92,1.23,0.12
Bias,-5.56,-1.05,-0.59,


In [46]:
print "Standard scaling: Least negative in top 10 positive instances"
print "Index of test:",tp_ss[index_least_neg_ss]
print "Actual label", y_test[tp_ss[index_least_neg_ss]]
least_neg_instance_ss_table1

Standard scaling: Least negative in top 10 positive instances
Index of test: 29
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,0.0,-1.14,-1.52,-0.54
plas,173.0,1.63,1.13,2.14
pres,78.0,0.46,-0.14,-0.13
skin,32.0,0.72,-0.03,0.0
insu,265.0,1.61,0.91,-0.22
mass,46.5,1.84,2.13,1.26
pedi,1.16,2.08,1.04,0.58
age,58.0,2.11,2.34,0.32
Bias,-5.56,-1.05,-0.59,


In [47]:
print "Information gain scaling: Least negative in top 10 positive instances"
print "Index of test:",tp_ig[index_least_neg_ig]
print "Actual label", y_test[tp_ig[index_least_neg_ig]]
least_neg_instance_ig_table1

Information gain scaling: Least negative in top 10 positive instances
Index of test: 29
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,0.0,-1.14,-1.52,-0.93
plas,173.0,1.63,1.13,1.63
pres,78.0,0.46,-0.14,0.04
skin,32.0,0.72,-0.03,0.0
insu,265.0,1.61,0.91,-0.14
mass,46.5,1.84,2.13,1.59
pedi,1.16,2.08,1.04,0.38
age,58.0,2.11,2.34,0.37
Bias,-5.56,-1.05,-0.59,


In [48]:
# The indices of top 10 positive based on positive evidence

print tp_ori
print tp_ss
print tp_ig

[239  99 210  90 206 235 240  21  59 148]
[210 239  90  99 235 206  29 240  21  79]
[210 239  90  99  29 206  79 240 235  21]


In [49]:
# Least positive among Top 10 negative instances

# Original features
top_negative_index_ori = np.argsort(abs(neg_evi_ori))[::-1]
tn_ori = top_negative_index_ori[:10]
pos_info_ori = pos_evi_ori[tp_ori]
index_least_pos_ori = np.argmin(pos_info_ori)
least_pos_instance_ori = instance(header[:class_index],X_test_ori[tn_ori[index_least_pos_ori]], X_test_ss[tn_ori[index_least_pos_ori]],
                                  X_test_ig[tn_ori[index_least_pos_ori]], clf_ori, clf_ss, clf_ig)
least_pos_instance_ori_table1 = least_pos_instance_ori.get_information(1)

# Standard scaling
top_negative_index_ss = np.argsort(abs(neg_evi_ss))[::-1]
tn_ss = top_negative_index_ss[:10]
pos_info_ss = pos_evi_ss[tp_ss]
index_least_pos_ss = np.argmin(pos_info_ss)
least_pos_instance_ss = instance(header[:class_index],X_test_ori[tn_ss[index_least_pos_ss]], X_test_ss[tn_ss[index_least_pos_ss]],
                                  X_test_ig[tn_ss[index_least_pos_ss]], clf_ori, clf_ss, clf_ig)
least_pos_instance_ss_table1 = least_pos_instance_ss.get_information(2)


# Information gain scaling
top_negative_index_ig = np.argsort(abs(neg_evi_ig))[::-1]
tn_ig = top_negative_index_ig[:10]
pos_info_ig = pos_evi_ig[tp_ig]
index_least_pos_ig = np.argmin(pos_info_ig)
least_pos_instance_ig = instance(header[:class_index],X_test_ori[tn_ig[index_least_pos_ig]], X_test_ss[tn_ig[index_least_pos_ig]],
                                  X_test_ig[tn_ig[index_least_pos_ig]], clf_ori, clf_ss, clf_ig)
least_pos_instance_ig_table1 = least_pos_instance_ig.get_information(3)

In [50]:
print "Original: Least positive in top 10 negative instances"
print "Index of test:",tn_ori[index_least_pos_ori]
print "Actual label", y_test[tn_ori[index_least_pos_ori]]
least_pos_instance_ori_table1

Original: Least positive in top 10 negative instances
Index of test: 187
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,0.0,-1.14,-1.52,0.0
plas,125.0,0.13,-0.24,4.0
pres,96.0,1.39,0.69,-1.96
skin,0.0,-1.29,-1.67,0.0
insu,0.0,-0.69,-1.07,-0.0
mass,22.5,-1.2,-0.61,1.01
pedi,0.26,-0.63,-1.08,0.14
age,21.0,-1.04,-0.59,0.06
Bias,-5.56,-1.05,-0.59,


In [51]:
print "Standard scaling: Least positive in top 10 negative instances"
print "Index of test:",tn_ss[index_least_pos_ss]
print "Actual label", y_test[tn_ss[index_least_pos_ss]]
least_pos_instance_ss_table1

Standard scaling: Least positive in top 10 negative instances
Index of test: 117
Actual label 1


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,8.0,1.23,0.35,0.59
plas,125.0,0.13,-0.24,0.17
pres,96.0,1.39,0.69,-0.38
skin,0.0,-1.29,-1.67,-0.01
insu,0.0,-0.69,-1.07,0.1
mass,0.0,-4.06,-3.17,-2.78
pedi,0.23,-0.72,-1.15,-0.2
age,54.0,1.77,2.02,0.27
Bias,-5.56,-1.05,-0.59,


In [52]:
print "Information gain scaling: Least positive in top 10 negative instances"
print "Index of test:",tn_ig[index_least_pos_ig]
print "Actual label", y_test[tn_ig[index_least_pos_ig]]
least_pos_instance_ig_table1

Information gain scaling: Least positive in top 10 negative instances
Index of test: 161
Actual label 0


0,1,2,3,4
Features,Ori_value,ss_value,ig_value,w * v
preg,3.0,-0.25,-0.82,-0.5
plas,61.0,-1.87,-2.07,-2.99
pres,82.0,0.67,0.05,-0.01
skin,28.0,0.47,-0.23,0.0
insu,0.0,-0.69,-1.07,0.17
mass,34.4,0.31,0.75,0.56
pedi,0.24,-0.69,-1.12,-0.41
age,46.0,1.09,1.39,0.22
Bias,-5.56,-1.05,-0.59,


In [53]:
# The indices of top 10 negative based on negative evidence

print tn_ori
print tn_ss
print tn_ig

[ 21 177 231  25 169 178  70  62  69 187]
[ 43  74 125  87  49 136 191 190 230 117]
[ 43  74 125  87  49 136 190 230 191 161]
