In [None]:
#===============================================================================
# IMPORTS
#===============================================================================
import os
import multiprocessing
import numpy as np
from scipy.sparse import hstack, vstack, csr_matrix
from sklearn.datasets import load_svmlight_file
import pandas as pd
from joblib import Parallel, delayed
from parallel_process import parallel_classification_table1
%matplotlib inline

#===============================================================================
# SETTINGS
#===============================================================================
DIR_DATA = r'./dataset/figure1/'
DIR_CACHE = r'./datacache/'
DIR_FIGURES = r'./figures/'
DIR_TABLE = r'./tables/'
TOTAL_RUN = 100
PARAM = {
    'epsilon': [1e-3, 5e-3, 1e-2, 5e-2, 1e-1],
    'kappa': [0.05, 0.1, 0.25, 0.5, float('inf')],
    'deg': [2, 3, 4, 5],
    'gamma_rbf': [1/100, 1/64, 1/36, 1/25],
    'gamma_lap': [1/100, 1/64, 1/36, 1/25]
}
kernel_functions = ['polynomial', 'rbf', 'laplacian']

In [None]:
#===============================================================================
# Load data 1, train and test results
#===============================================================================
DIGITS = [1, 7]
PIXEL = 784
DATA1 = load_svmlight_file(DIR_DATA + 'MNIST_train_' + str(DIGITS[0]) + '.txt')
ADD_ZERO1 = csr_matrix(np.zeros((DATA1[0].shape[0], PIXEL - DATA1[0].shape[1])))
DATA2 = load_svmlight_file(DIR_DATA + 'MNIST_train_' + str(DIGITS[1]) + '.txt')
ADD_ZERO2 = csr_matrix(np.zeros((DATA2[0].shape[0], PIXEL - DATA2[0].shape[1])))
x_train = vstack((hstack((DATA1[0], ADD_ZERO1)), hstack((DATA2[0], ADD_ZERO2))))
y_train = np.hstack((DATA1[1], DATA2[1]))
y_train[y_train == DIGITS[0]] = -1
y_train[y_train == DIGITS[1]] = 1
DATA3 = load_svmlight_file(DIR_DATA + 'MNIST_test_' + str(DIGITS[0]) + '.txt')
ADD_ZERO3 = csr_matrix(np.zeros((DATA3[0].shape[0], PIXEL - DATA3[0].shape[1])))
DATA4 = load_svmlight_file(DIR_DATA + 'MNIST_test_' + str(DIGITS[1]) + '.txt')
ADD_ZERO4 = csr_matrix(np.zeros((DATA4[0].shape[0], PIXEL - DATA4[0].shape[1])))
x_test = vstack((hstack((DATA3[0], ADD_ZERO3)), hstack((DATA4[0], ADD_ZERO4))))
y_test = np.hstack((DATA3[1], DATA4[1]))
y_test[y_test == DIGITS[0]] = -1
y_test[y_test == DIGITS[1]] = 1
results1 = Parallel(n_jobs=4, verbose=11)(
    delayed(parallel_classification_table1)(x_train.toarray(), y_train,
                                            x_test.toarray(), y_test, PARAM,
                                            kernel_functions)
    for i in range(TOTAL_RUN)
)
tmp1, tmp2 = zip(*results1)
dro_df1 = pd.DataFrame(list(tmp1))
reg_df1 = pd.DataFrame(list(tmp2))

In [None]:
#===============================================================================
# Load data 2, train and test results
#===============================================================================
DIGITS = [3, 8]
PIXEL = 784
DATA1 = load_svmlight_file(DIR_DATA + 'MNIST_train_' + str(DIGITS[0]) + '.txt')
ADD_ZERO1 = csr_matrix(np.zeros((DATA1[0].shape[0], PIXEL - DATA1[0].shape[1])))
DATA2 = load_svmlight_file(DIR_DATA + 'MNIST_train_' + str(DIGITS[1]) + '.txt')
ADD_ZERO2 = csr_matrix(np.zeros((DATA2[0].shape[0], PIXEL - DATA2[0].shape[1])))
x_train = vstack((hstack((DATA1[0], ADD_ZERO1)), hstack((DATA2[0], ADD_ZERO2))))
y_train = np.hstack((DATA1[1], DATA2[1]))
y_train[y_train == DIGITS[0]] = -1
y_train[y_train == DIGITS[1]] = 1
DATA3 = load_svmlight_file(DIR_DATA + 'MNIST_test_' + str(DIGITS[0]) + '.txt')
ADD_ZERO3 = csr_matrix(np.zeros((DATA3[0].shape[0], PIXEL - DATA3[0].shape[1])))
DATA4 = load_svmlight_file(DIR_DATA + 'MNIST_test_' + str(DIGITS[1]) + '.txt')
ADD_ZERO4 = csr_matrix(np.zeros((DATA4[0].shape[0], PIXEL - DATA4[0].shape[1])))
x_test = vstack((hstack((DATA3[0], ADD_ZERO3)), hstack((DATA4[0], ADD_ZERO4))))
y_test = np.hstack((DATA3[1], DATA4[1]))
y_test[y_test == DIGITS[0]] = -1
y_test[y_test == DIGITS[1]] = 1
results2 = Parallel(n_jobs=4, verbose=11)(
    delayed(parallel_classification_table1)(x_train.toarray(), y_train,
                                            x_test.toarray(), y_test, PARAM,
                                            kernel_functions)
    for i in range(TOTAL_RUN)
)
tmp1, tmp2 = zip(*results2)
dro_df2 = pd.DataFrame(list(tmp1))
reg_df2 = pd.DataFrame(list(tmp2))

In [None]:
#===============================================================================
# Load data 3, train and test results
#===============================================================================
DIGITS = [4, 9]
PIXEL = 784
DATA1 = load_svmlight_file(DIR_DATA + 'MNIST_train_' + str(DIGITS[0]) + '.txt')
ADD_ZERO1 = csr_matrix(np.zeros((DATA1[0].shape[0], PIXEL - DATA1[0].shape[1])))
DATA2 = load_svmlight_file(DIR_DATA + 'MNIST_train_' + str(DIGITS[1]) + '.txt')
ADD_ZERO2 = csr_matrix(np.zeros((DATA2[0].shape[0], PIXEL - DATA2[0].shape[1])))
x_train = vstack((hstack((DATA1[0], ADD_ZERO1)), hstack((DATA2[0], ADD_ZERO2))))
y_train = np.hstack((DATA1[1], DATA2[1]))
y_train[y_train == DIGITS[0]] = -1
y_train[y_train == DIGITS[1]] = 1
DATA3 = load_svmlight_file(DIR_DATA + 'MNIST_test_' + str(DIGITS[0]) + '.txt')
ADD_ZERO3 = csr_matrix(np.zeros((DATA3[0].shape[0], PIXEL - DATA3[0].shape[1])))
DATA4 = load_svmlight_file(DIR_DATA + 'MNIST_test_' + str(DIGITS[1]) + '.txt')
ADD_ZERO4 = csr_matrix(np.zeros((DATA4[0].shape[0], PIXEL - DATA4[0].shape[1])))
x_test = vstack((hstack((DATA3[0], ADD_ZERO3)), hstack((DATA4[0], ADD_ZERO4))))
y_test = np.hstack((DATA3[1], DATA4[1]))
y_test[y_test == DIGITS[0]] = -1
y_test[y_test == DIGITS[1]] = 1
results3 = Parallel(n_jobs=4, verbose=11)(
    delayed(parallel_classification_table1)(x_train.toarray(), y_train,
                                            x_test.toarray(), y_test, PARAM,
                                            kernel_functions)
    for i in range(TOTAL_RUN)
)
tmp1, tmp2 = zip(*results3)
dro_df3 = pd.DataFrame(list(tmp1))
reg_df3 = pd.DataFrame(list(tmp2))

In [None]:
#===============================================================================
# Output latex table
#===============================================================================
out_text = """\documentclass{article}
\\usepackage{multirow}
\\usepackage[margin=0.5in]{geometry}
\\begin{document}
\\begin{table} [h] 
\\centering
\\caption{The average Area Under the Curve (AUC) scores
evaluated on the test dataset over 100 trials.}
\\bgroup
\\def\\arraystretch{1.1}
\\begin{tabular}{|l|cc|cc|cc|cc|}
\\cline{2-9}
\\multicolumn{1}{c|}{} & \\multicolumn{2}{c|}{Linear} & \\multicolumn{2}{c|}{Polynomial} &
\\multicolumn{2}{c|}{Laplacian} & \multicolumn{2}{c|}{Gaussian} \\\\ 
\\cline{2-9}
\\multicolumn{1}{c|}{} & RSVM & DRSVM & RSVM & DRSVM & RSVM & DRSVM & RSVM & DRSVM \\\\ \\hline \n"""

tmp = '1 vs 7'
for kernel_fun in kernel_functions:
    tmp = tmp + '& $ ' + str(round(100*reg_df1[kernel_fun].mean(), 2)) + \
    ' \\pm ' + str(round(100*reg_df1.std()[kernel_fun], 2)) + ' $ ' + \
    '& $ ' + str(round(100*dro_df1[kernel_fun].mean(), 2)) + \
    ' \\pm ' + str(round(100*dro_df1.std()[kernel_fun], 2)) + ' $ '
out_text = out_text + tmp + '\\\\ \\hline \n'

tmp = '3 vs 8'
for kernel_fun in kernel_functions:
    tmp = tmp + '& $ ' + str(round(100*reg_df2[kernel_fun].mean(), 2)) + \
    ' \\pm ' + str(round(100*reg_df2.std()[kernel_fun], 2)) + ' $ ' + \
    '& $ ' + str(round(100*dro_df2[kernel_fun].mean(), 2)) + \
    ' \\pm ' + str(round(100*dro_df2.std()[kernel_fun], 2)) + ' $ '
out_text = out_text + tmp + '\\\\ \\hline \n'
    
tmp = '4 vs 9'
for kernel_fun in kernel_functions:
    tmp = tmp + '& $ ' + str(round(100*reg_df3[kernel_fun].mean(), 2)) + \
    ' \\pm ' + str(round(100*reg_df3.std()[kernel_fun], 2)) + ' $ ' + \
    '& $ ' + str(round(100*dro_df3[kernel_fun].mean(), 2)) + \
    ' \\pm ' + str(round(100*dro_df3.std()[kernel_fun], 2)) + ' $ '
out_text = out_text + tmp + '\\\\ \\hline \n '

out_text = out_text + '\\end{tabular}\n\\egroup\n\\end{table}\\end{document}'

if not os.path.exists(DIR_TABLE):
    os.makedirs(DIR_TABLE)
f = open(DIR_TABLE + 'table_1.tex', 'w')
f.write(out_text)
f.close()
os.chdir(DIR_TABLE)
assert os.system('pdflatex table_1.tex') == 0
os.chdir('..')