In [1]:
import ast
import scipy.io
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn import neighbors, datasets
import statistics
import pandas as pd
import pickle

In [2]:
data_dat = ['australian.dat','bupa.dat','glass.dat']
cv_filename = ['cv_australian.mat','cv_bupa.mat','cv_glass.mat']

# KNN,Naive Bayes,LogisticRegression algorithms

In [3]:
models = [ LogisticRegression(random_state=0),GaussianNB(),neighbors.KNeighborsClassifier(n_neighbors=10,p = 2)]

In [4]:
def training_models(models,features_train,targets_train,features_test):
    meta_proba = np.zeros((len(models) * 2, features_test.shape[0]))
    for i in range(len(models)):
        learner = models[i]
        learner.fit(features_train,targets_train)
        predictions_proba = learner.predict_proba(features_test)
        meta_proba[2*i][:] = predictions_proba.T[0]
        meta_proba[2*i + 1][:] = predictions_proba.T[1]
    meta_proba = meta_proba.transpose()
    return meta_proba

# Caculate Mean and Variance

In [5]:
def combining_sum_product(matrix1, matrix2, matrix3):
    #Combining Algorithms use Sum Rules
    combining_sum_rule = matrix1 + matrix2 + matrix3
    combining_sum_rule = combining_sum_rule * 1/3   #Mean elements in matrix
    #Combining algorithms PRODUCT RULES
    combining_product_rule =  matrix1 * matrix2 * matrix3
    return combining_sum_rule,combining_product_rule

In [6]:
#Combining algorithms MIN RULES
#Function compare element row in matrix => Min value 
def min_element(value1, value2, value3):
    min_value = min([value1, value2, value3])
    return min_value 

def row_in_matrix(i,matrix_1, matrix_2, matrix_3):
    min_value_class1 = min_element(value1=matrix_1[i][0], value2=matrix_2[i][0], value3= matrix_3[i][0])
    min_value_class2 = min_element(value1=matrix_1[i][1], value2=matrix_2[i][1], value3= matrix_3[i][1])
    return min_value_class1, min_value_class2
    
def min_rule(matrix_1, matrix_2, matrix_3):
    combining_min_rule = np.zeros((matrix_1.shape))
    
    #Store variables to combining matrix
    for i in range(len(matrix_1)):
        combining_min_rule[i] = row_in_matrix(i, matrix_1=matrix_1,
                                         matrix_2=matrix_2,matrix_3=matrix_3)
    return combining_min_rule


In [7]:
#Combining algorithms MAX RULES
#Create variables to store combining matrix
def max_element(value1, value2, value3):
        max_value = max([value1, value2, value3])
        return max_value 
#Function compare element row in matrix => Max value 
def row_in_matrix(i,matrix_1, matrix_2, matrix_3):
        max_value_class1 = max_element(value1=matrix_1[i][0], value2=matrix_2[i][0], value3= matrix_3[i][0])
        max_value_class2 = max_element(value1=matrix_1[i][1], value2=matrix_2[i][1], value3= matrix_3[i][1])
        return max_value_class1, max_value_class2
    
def max_rule(matrix_1, matrix_2, matrix_3):
    combining_max_rule = np.zeros((matrix_1.shape))
    
    #Store variables to combining matrix
    for i in range(len(matrix_1)):
        combining_max_rule[i] = row_in_matrix(i, matrix_1=matrix_1,
                                             matrix_2=matrix_2,matrix_3=matrix_3)
    return combining_max_rule


In [8]:
def target(combining_matrix):
    targets_combining_algorithm = []
    for row in combining_matrix:
        result = 1 if row[0] > row[1] else 2
        targets_combining_algorithm.append(result)
    targets_combining_algorithm = np.asarray(targets_combining_algorithm)
    return targets_combining_algorithm
    
def error_combining_rule(target_combining_rule, target_test):
    boolen_result = []
    for i in range(len(target_test)):
        result = 1 if target_combining_rule[i] != target_test[i] else 0
        boolen_result.append(result)
    mean_combining_rule = statistics.mean(boolen_result)
#     variance_combining_rule = statistics.variance(boolen_result)
    return mean_combining_rule

In [9]:
arrErrSum = []
arrErrProd = []
arrErrMax= []
arrErrMin = []

In [10]:
def process_data_dat(filename_dat):
    samples = []
    data_set = [i.strip().split() for i in open("../data/" + filename_dat).readlines()] 
    for sample in data_set:
        res = ast.literal_eval(sample[0])
        sample = list(res)
        samples.append(sample)
    samples = np.asarray(samples)
    return samples

def process_cv_filename(cv_file):
    cv_mat = scipy.io.loadmat('../data/' + cv_file)
    return cv_mat['cv']

def split_train_test_by_id(data,cv_file, niters, nfolds):
    for i in range(niters):
        for j in range(nfolds):
            train_index = []
            cv_test = process_cv_filename(cv_file)
            test_index = cv_test[0][i*nfolds + j]
            test_index = np.concatenate(([i-1 for i in test_index]))
#             print("LOOP:",i*nfolds + j)
            train_index.append([i for i in range(len(data)) if i not in test_index]) 
            train_index = np.asarray(train_index[0])
#             print(data[train_index])
#             print(data[test_index])
            meta_proba = training_models(models,features_train=data[train_index][:,0:data.shape[1] - 1],
                               targets_train=data[train_index][:,-1], features_test=data[test_index][:,0:data.shape[1] - 1])
            predict_lr_proba = meta_proba[:,0:2]
            predict_gnb_proba = meta_proba[:,2:4]
            predict_knn_proba = meta_proba[:,4:6]
            #Caculate Mean and Variance by Sum Rules:
            combining_sum_rule,combining_product_rule = combining_sum_product(predict_lr_proba,predict_gnb_proba,
                                                                             predict_knn_proba)
            targets_combining_sum_rule = target(combining_sum_rule)
            mean_combining_sum_rule = error_combining_rule(targets_combining_sum_rule, data[test_index][:,-1] )
            arrErrSum.append(mean_combining_sum_rule)
            
            #Caculate Mean and Variance by Product Rules:
            targets_combining_product_rule = target(combining_product_rule)
            mean_combining_product_rule = error_combining_rule(targets_combining_product_rule, data[test_index][:,-1] )
            arrErrProd.append(mean_combining_product_rule)
            
            #Caculate Mean and Variance by Min Rules:
            combining_min_rule = min_rule(predict_lr_proba,predict_gnb_proba,predict_knn_proba)
            targets_combining_min_rule = target(combining_min_rule)
            mean_combining_min_rule = error_combining_rule(targets_combining_min_rule, data[test_index][:,-1] )
            arrErrMin.append(mean_combining_min_rule)
            
            #Caculate Mean and Variance by Max Rules:
            combining_max_rule = max_rule(predict_lr_proba,predict_gnb_proba,predict_knn_proba)
            targets_combining_max_rule = target(combining_max_rule)
            mean_combining_max_rule = error_combining_rule(targets_combining_max_rule, data[test_index][:,-1] )
            arrErrMax.append(mean_combining_max_rule)
    #Caculate mean and variance Total Sum Rule
    mean_sum = statistics.mean(arrErrSum)
    variance_sum = statistics.variance(arrErrSum)
    #Caculate mean and variance Total Product Rule
    mean_product = statistics.mean(arrErrProd)
    variance_product = statistics.variance(arrErrProd)
    #Caculate mean and variance Total Min Rule
    mean_min = statistics.mean(arrErrMin)
    variance_min = statistics.variance(arrErrMin)
    #Caculate mean and variance Total Max Rule
    mean_max = statistics.mean(arrErrMax)
    variance_max = statistics.variance(arrErrMax)
    
    pickle_file = {'Dataset':cv_file,'arrErrSum':arrErrSum,'arrErrProd':arrErrProd,
                   'arrErrMin':arrErrMin,'arrErrMax':arrErrMax}
    pickle_out = open("dict_{}.pickle".format(cv_file),"wb")
    pickle.dump(pickle_file, pickle_out)
    pickle_out.close()
    
    

In [None]:
for i in range(len(data_dat)):
    data = process_data_dat(data_dat[i])
    split_train_test_by_id(data,cv_filename[i],niters=3,nfolds=10)
    pickle_in = open("dict_cv_australian.mat.pickle","rb")
    example_dict = pickle.load(pickle_in)

In [13]:
pickle_in = open("dict_cv_australian.mat.pickle","rb")
example_dict = pickle.load(pickle_in)

In [19]:
import pandas as pd
data = pd.DataFrame.from_dict(example_dict)
data

Unnamed: 0,Dataset,arrErrSum,arrErrProd,arrErrMin,arrErrMax
0,cv_australian.mat,0.144928,0.144928,0.15942,0.15942
1,cv_australian.mat,0.318841,0.318841,0.318841,0.318841
2,cv_australian.mat,0.188406,0.188406,0.202899,0.202899
3,cv_australian.mat,0.173913,0.173913,0.173913,0.173913
4,cv_australian.mat,0.173913,0.188406,0.188406,0.188406
5,cv_australian.mat,0.188406,0.188406,0.188406,0.188406
6,cv_australian.mat,0.289855,0.289855,0.304348,0.304348
7,cv_australian.mat,0.15942,0.15942,0.173913,0.173913
8,cv_australian.mat,0.15942,0.173913,0.173913,0.173913
9,cv_australian.mat,0.231884,0.231884,0.231884,0.231884
