In [1]:
import sys
sys.path.append('..')
import ast
import scipy.io
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
from sklearn import tree

import statistics
import pandas as pd
import pickle
import os
from constant import path, parameter
from lib import hypothesis
from sklearn import svm

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from train import TrainMaster
train_master = TrainMaster()

from lib import evaluate
rules = evaluate.Rules()

In [4]:
models = parameter.MODEL

In [5]:
def process_cv_filename(cv_file):
    path_cvfile = os.path.join(path.DATA_PATH,cv_file)
    cv_mat = scipy.io.loadmat(path_cvfile)
    return cv_mat['cv']

In [6]:
def split_train_test_by_id(data,cv_file, models, classes, niters, nfolds):
    arrErrSvm = []
    arrErrTree = []
    for i in range(niters):
        for j in range(nfolds):
            train_index = []
            cv_test = process_cv_filename(cv_file)
            KF = KFold(n_splits=10)
            meta_index = 0
            # Test index
            test_index = cv_test[0][i*nfolds + j]
            test_index = np.concatenate(([i-1 for i in test_index]))
            # Train index
            train_index.append([i for i in range(len(data)) if i not in test_index]) 
            train_index = np.asarray(train_index[0])
            # ------------- Create metadata -----------------
            meta_data = np.zeros((len(models) * len(classes), train_index.shape[0]))
            meta_targets = np.zeros(len(train_index))
            meta_prob_test = np.zeros((len(models) * len(classes), test_index.shape[0]))
            # -----------------------------------------------
            features_train = data[train_index][:,0:data.shape[1] - 1]
            targets_train = data[train_index][:,-1]
            
            features_test = data[test_index][:, 0:data.shape[1] - 1]
            targets_test = data[test_index][:,-1]
            #------------- Training - Predict Prob -----------------
            for i in range(len(models)):
                clf = models[i].fit(features_train,targets_train)
                predict_proba = clf.predict_proba(features_test)
#                 print('====',predict_proba.transpose().shape)
                num_classes = len(classes)
                meta_prob_test[num_classes * i:num_classes * i + num_classes, :] = predict_proba.transpose()
            meta_prob_test = meta_prob_test.transpose()
            
            # ------------- Training Phase K- Fold------------------
#             print('-----------------features_train',features_train)
            train_set = data[train_index]
            for train_indices, test_indices in KF.split(train_set):                
                for i,model in enumerate(models):
                    features_train_KF = train_set[train_indices][:,0:data.shape[1] - 1]
                    targets_train_KF = train_set[train_indices][:,-1]
                    #-----------------------------------------------------
                    features_test_KF = train_set[test_indices][:,0:data.shape[1] - 1]
                    targets_test_KF = train_set[test_indices][:,-1]
                    #-----------------------------------------------------
                    learner = model 
                    learner.fit(features_train_KF,targets_train_KF)
                    predict_probability = learner.predict_proba(features_test_KF)
                    num_classes = len(classes)
                    meta_data[num_classes * i:num_classes * i + num_classes, meta_index:meta_index + len(test_indices)] = predict_probability.transpose()
                    
                meta_targets[meta_index:meta_index + len(test_indices)] = targets_test_KF
                meta_index += len(test_indices)
            #Transpose the metadata
            meta_data = meta_data.transpose()
            # ---------------------- SVC Combining ------------------------
            clf_svm = svm.SVC()
            clf_svm.fit(meta_data, targets_train)
            targets_svm_predict = clf_svm.predict(meta_prob_test)
            mean_combining_svm_rule = rules.error_combining_rule(targets_svm_predict,targets_test)
            arrErrSvm.append(mean_combining_svm_rule)
            
            # ---------------------- Decision Trees -----------------------
            clf_tree = tree.DecisionTreeClassifier()
            clf_tree.fit(meta_data, targets_train)
            targets_tree_predict = clf_tree.predict(meta_prob_test)
            mean_combining_tree_rule = rules.error_combining_rule(targets_tree_predict,targets_test)
            arrErrTree.append(mean_combining_tree_rule)
            
    pickle_file1 = {'Dataset':cv_file,'arrErrSvm':arrErrSvm}
    pickle_file2 = {'Dataset':cv_file,'arrErrTree':arrErrTree}
    file_result1 = os.path.join(path.RESULT_PATH,"sym_rule_{}.pickle".format(cv_file))
    file_result2 = os.path.join(path.RESULT_PATH,"dtree_rule_{}.pickle".format(cv_file))
    pickle_out1 = open(file_result1,"wb")
    pickle_out2 = open(file_result2,"wb")
    pickle.dump(pickle_file1, pickle_out1)
    pickle.dump(pickle_file2, pickle_out2)
#     pickle_out.close()
    


In [7]:
def main():
    for i in range(len(path.DATA_DAT)):
# ------------------- Load Data ----------------------------------
        load_data = os.path.join(path.DATA_PATH, path.DATA_DAT[i])
        data = np.loadtxt(load_data, delimiter=',')
        classes = np.unique(data[:,-1])
        split_train_test_by_id(data=data, cv_file=path.CV_FILENAME[i],
                               models=models,classes=classes,
                               niters=3, nfolds = 10)

In [8]:
main()