In [1]:
#!/usr/bin/env python
import sys
sys.path.append('/home/bd-dev/lijian/201801_ICML/script/libsvm/libsvm-3.22/python')
from sklearn.externals.joblib import Memory
from sklearn.datasets import load_svmlight_file
mem = Memory("./mycache")

import scipy.io as sio
import numpy as np
from svmutil import *
from svm import *
from shogun import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy
from shogun import CombinedFeatures, RealFeatures, MulticlassLabels
from shogun import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel
from shogun import MKLMulticlass
from shogun import GMNPSVM, CSVFile
from sklearn.model_selection import train_test_split

# load data from txt
def loadFromTxt (file_path):
    return np.loadtxt(file_path)

# load data from libsvm data
def loadFromLibsvm (file_path):
    sys.path.append('/home/bd-dev/lijian/201801_ICML/script/libsvm/libsvm-3.22/python')
    return svm_read_problem(file_path)

# load data from .mat file
def loadFromLibsvm (file_path):
    return sio.loadmat(file_path)

# OneVsRest learning machine
def classifier_multiclassmachine (fm_train_real,fm_test_real,label_train_multiclass,width=2.1,C=1,epsilon=1e-5):
    feats_train=RealFeatures(fm_train_real)
    feats_test=RealFeatures(fm_test_real)
    kernel=GaussianKernel(feats_train, feats_train, width)

    labels=MulticlassLabels(label_train_multiclass)

    classifier = LibSVM()
    classifier.set_epsilon(epsilon)
    #print labels.get_labels()
    mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),kernel,classifier,labels)
    mc_classifier.train()

    kernel.init(feats_train, feats_test)
    out = mc_classifier.apply().get_labels()
    return out

# mc-mkl learning machine
def mkl_multiclass (fm_train_real, fm_test_real, label_train_multiclass,
    C, epsilon, num_threads, mkl_epsilon, mkl_norm):
    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    for i in range(-10,11):
        subkfeats_train = RealFeatures(fm_train_real)
        subkfeats_test = RealFeatures(fm_test_real)
        subkernel = GaussianKernel(pow(2,i+1))
        feats_train.append_feature_obj(subkfeats_train)
        feats_test.append_feature_obj(subkfeats_test)
        kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = MulticlassLabels(label_train_multiclass)

    mkl = MKLMulticlass(C, kernel, labels)

    mkl.set_epsilon(epsilon)
    mkl.parallel.set_num_threads(num_threads)
    mkl.set_mkl_epsilon(mkl_epsilon)
    mkl.set_mkl_norm(mkl_norm)

    mkl.train()

    kernel.init(feats_train, feats_test)

    out =  mkl.apply().get_labels()
    return out

def classifier_gmnpsvm (fm_train_real,fm_test_real,label_train_multiclass,width=2.1,C=1,epsilon=1e-5):
    feats_train=RealFeatures(fm_train_real)
    feats_test=RealFeatures(fm_test_real)
    kernel=GaussianKernel(feats_train, feats_train, width)

    labels=MulticlassLabels(label_train_multiclass)

    svm=GMNPSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train(feats_train)

    out=svm.apply(feats_test).get_labels()
    return out


def phylproFile(mode, data_name):
    data_path = '/home/bd-dev/lijian/201801_ICML/tmp/RawData/'+ data_name +'/'+ data_name +'.phylpro.mat'
    label_path = '/home/bd-dev/lijian/201801_ICML/data/' + data_name + '/label_' + data_name + '.mat'
    data = sio.loadmat(data_path)['phylpros']
    label = sio.loadmat(label_path)['y']
    label=label-label.min()

    accuracy=[]
    for i in range(times):
        X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=test_size)
        if mode=='mcmkl':
            label_pre = mkl_multiclass(X_train.T, X_test.T, y_train.reshape(y_train.size,).astype('float64'), C, epsilon, num_threads, mkl_epsilon, mkl_norm)
        elif mode=='1v1':
            label_pre = classifier_multiclassmachine(X_train.T, X_test.T, y_train.reshape(y_train.size,).astype('float64'), width, C, epsilon)
        accuracy.append((y_test.reshape(y_test.size,).astype('float64')==label_pre).sum()/float(label_pre.size))
        print 'finish round ' + str(i) +', accuracy: ' + str(accuracy[len(accuracy)-1])
    print("\n".join(str(item) for item in accuracy))

def svmlightFile(mode, data_name):
    data_path = '/home/bd-dev/lijian/201801_ICML/tmp/'+ data_name +'.scale'
    data = load_svmlight_file(data_path)
    X, y = data[0], data[1].reshape(len(data[1]),1)
    y=y-y.min()

    accuracy=[]
    for i in range(times):        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        if mode=='mcmkl':
            label_pre = mkl_multiclass(X_train.toarray().T, X_test.toarray().T, y_train.reshape(y_train.size,).astype('float64'), C, epsilon, num_threads, mkl_epsilon, mkl_norm)
        elif mode=='1v1':
            label_pre = classifier_multiclassmachine(X_train.toarray().T, X_test.toarray().T, y_train.reshape(y_train.size,).astype('float64'), width, C, epsilon)
        accuracy.append((y_test.reshape(y_test.size,).astype('float64')==label_pre).sum()/float(label_pre.size))
        print 'finish round ' + str(i) +', accuracy: ' + str(accuracy[len(accuracy)-1])
    print("\n".join(str(item) for item in accuracy))

In [2]:
if __name__=='__main__':
    times=30
    C=1.2
    test_size=0.2
    width=8
    epsilon=1e-5
    num_threads=8
    mkl_epsilon=0.001
    mkl_norm=2
    mode='gmnp'
    data_path = '/home/bd-dev/lijian/201801_ICML/tmp/iris.scale'
    data = load_svmlight_file(data_path)
    X, y = data[0], data[1].reshape(len(data[1]),1)
    y=y-y.min()

    accuracy=[]
    for i in range(times):        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        if mode=='mcmkl':
            label_pre = mkl_multiclass(X_train.toarray().T, X_test.toarray().T, y_train.reshape(y_train.size,).astype('float64'), C, epsilon, num_threads, mkl_epsilon, mkl_norm)
        elif mode=='1v1':
            label_pre = classifier_multiclassmachine(X_train.todense().T, X_test.todense().T, y_train.reshape(y_train.size,).astype('float64'), width, C, epsilon)
        elif mode=='gmnp':
            label_pre = classifier_gmnpsvm(X_train.toarray().T, X_test.toarray().T, y_train.reshape(y_train.size,).astype('float64'), width, C, epsilon)
        accuracy.append((y_test.reshape(y_test.size,).astype('float64')==label_pre).sum()/float(label_pre.size))
        print 'finish round ' + str(i) +', accuracy: ' + str(accuracy[len(accuracy)-1])
    print("\n".join(str(item) for item in accuracy))

finish round 0, accuracy: 0.933333333333
finish round 1, accuracy: 0.866666666667
finish round 2, accuracy: 0.933333333333
finish round 3, accuracy: 0.966666666667
finish round 4, accuracy: 0.966666666667
finish round 5, accuracy: 0.966666666667
finish round 6, accuracy: 0.933333333333
finish round 7, accuracy: 0.966666666667
finish round 8, accuracy: 1.0
finish round 9, accuracy: 0.966666666667
finish round 10, accuracy: 0.933333333333
finish round 11, accuracy: 0.966666666667
finish round 12, accuracy: 0.966666666667
finish round 13, accuracy: 0.966666666667
finish round 14, accuracy: 0.933333333333
finish round 15, accuracy: 1.0
finish round 16, accuracy: 0.933333333333


KeyboardInterrupt: 

In [13]:
X_train.todense().T.shape

(9, 171)

In [12]:
y_train.shape

(171, 1)