In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import itertools
import operator
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.spatial.distance import pdist
from sklearn import metrics
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.metrics.pairwise import laplacian_kernel
from sklearn.metrics.pairwise import linear_kernel
from sklearn.model_selection import cross_val_score
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor

from sklearn import svm

import dionysus as d
import sklearn_tda as tda

from biomarker.data_collection import *

EXCLUDE_KEYS = [206, 205, 184, 183, 82, 81, 45]#, 85, 135, 192]
TEST_KEYS = [217, 216]#, 215, 214, 213, 212, 211, 210, 209]
NUM_TEST = 10
LIM = 5
MUL = 10

def compute_diagram(points, k=3):
    r = np.max(pdist(points, 'euclidean'))
    f = d.fill_rips(points, k, r)
    m = d.homology_persistence(f)
    return d.init_diagrams(m, f)

def get_max_diag_dim(dgms, h=1):
    m = 0
    for d in dgms:
        n = len(d[h])
        if n > m:
            m = n
    return m

def remove_for_loo(D, i, kernel=True):
    if kernel:
        D_train = np.delete(np.delete(D, i, axis=0), i, axis=1)
        D_test = np.delete(D[i], i, axis=0).reshape(1,-1)
    else:
        D_train = np.delete(D,i,axis=0)
        D_test = D[i].reshape(1,-1)
    return D_train, D_test


def compute_sample_weights(y, lim=LIM, mul=MUL, up_lim=None, up_mul=None):
    ret = np.ones(y.shape[0])
    for i in range(y.shape[0]):
        v = y[i]
        if v < lim:
            ret[i] = np.abs(lim-v)*mul
        if up_lim is not None:
            if v > up_lim:
                if up_mul is not None:
                    ret[i] = np.abs(lim-v)*up_mul
                else:
                    ret[i] = np.abs(lim-v)*mul
    return ret

def create_train_test_matrices(train_keys, test_keys):
    excel = parse_master_file(exclude_keys=EXCLUDE_KEYS)
    test_idxs = list(excel[np.isin(excel['Key'], test_keys)].index)
    excel_test = excel.iloc[test_idxs]
    L_test = get_filename_list(excel_test['Associated data'])
    y_test = excel_test['Output: logK'].values
    y_buck_test = excel_test['Output: logKbucket'].values
    
    excel_train = excel.drop(test_idxs, axis=0)
    L_train = get_filename_list(excel_train['Associated data'])
    y_train = excel_train['Output: logK'].values
    y_buck_train = excel_train['Output: logKbucket'].values
    
    x4_train, x4_dims = create_x4_matrix(L_train, return_dims=True)
    x4_train, x4_dims = create_x4_matrix(L_train, return_dims=True)
    x5_train, x5_dims = create_x5_matrix(L_train, return_dims=True)
    x6_train, x6_dims = create_x6_matrix(L_train, return_dims=True)
    x7_train, x7_dims = create_x7_matrix(L_train, return_dims=True)
    
    x4_test = create_x4_matrix(L_test, max_dims=x4_dims)
    x4_test = create_x4_matrix(L_test, max_dims=x4_dims)
    x5_test = create_x5_matrix(L_test, max_dims=x5_dims)
    x6_test = create_x6_matrix(L_test, max_dims=x6_dims)
    x7_test = create_x7_matrix(L_test, max_dims=x7_dims)
    
    x40_x47_train = excel_train.iloc[:, 3:-2]
    master_train, master_names = prepare_master(x40_x47_train)
    
    x40_x47_test = excel_test.iloc[:, 3:-2]
    master_test, master_names = prepare_master(x40_x47_test)
    
    X_train = np.hstack((x4_train,x4_train,x5_train,x6_train,x7_train))
    X_test = np.hstack((x4_test, x4_test, x5_test, x6_test, x7_test))
    
    X = np.vstack((X_train,X_test))
    K = rbf_kernel(X)
    X_train = K[:len(y_train), :len(y_train)]
    X_test = K[len(y_train):, :len(y_train)]
    
    return (X_train, y_train), (X_test, y_test)

def create_train_test_diagram_kernels_x4(train_keys, test_keys, h=1):
    excel = parse_master_file(exclude_keys=EXCLUDE_KEYS)
    test_idxs = list(excel[np.isin(excel['Key'], test_keys)].index)
    excel_test = excel.iloc[test_idxs]
    L_test = get_filename_list(excel_test['Associated data'])
    y_test = excel_test['Output: logK'].values
    y_buck_test = excel_test['Output: logKbucket'].values
    
    excel_train = excel.drop(test_idxs, axis=0)
    L_train = get_filename_list(excel_train['Associated data'])
    y_train = excel_train['Output: logK'].values
    y_buck_train = excel_train['Output: logKbucket'].values
    
    print('Computing Training Diagrams')
    diags_train = []
    for l in L_train:
        diags_train.append(compute_diagram(parse_x4(l)[['Val1', 'Val2', 'Val3']].values))
    for i in range(len(diags_train)):
        if len(diags_train[i][h]) == 0:
            diags_train[i] = np.array([[0,0]])
        else:
            diags_train[i] = np.array([[pt.birth,pt.death] for pt in diags_train[i][h]])
    
    print('Computing Test Diagrams')
    diags_test = []
    for l in L_test:
        diags_test.append(compute_diagram(parse_x4(l)[['Val1', 'Val2', 'Val3']].values))
    for i in range(len(diags_test)):
        if len(diags_test[i][h]) == 0:
            diags_test[i] = np.array([[0,0]])
        else:
            diags_test[i] = np.array([[pt.birth,pt.death] for pt in diags_test[i][h]])
    
    print('Computing Kernel')
    
    dd = diags_train + diags_test
    SW = tda.SlicedWassersteinKernel(num_directions=1, bandwidth=1.)
    
    D = SW.fit_transform(dd)
    
    D_train = D[:len(y_train), :len(y_train)]
    D_test = D[len(y_train):, :len(y_train)]
    return (D_train, y_train), (D_test, y_test)
      
    
def create_test_diagram_kernels(h=1, num_directions=1, bandwidth=1.0, x_type='x1', exclude_keys=EXCLUDE_KEYS):
    excel = parse_master_file(exclude_keys=exclude_keys)
    excel_test = excel
    L_test = get_filename_list(excel_test['Associated data'])
    y_test = excel_test['Output: logK'].values
    y_buck_test = excel_test['Output: logKbucket'].values
    
    print('Computing Test Diagrams')
    diags_test = []
    if x_type == 'x1':
        for l in L_test:
                diags_test.append(compute_diagram(parse_x1(l)[['X', 'Y', 'Z']].values))
    if x_type == 'x4':
        for l in L_test:
            diags_test.append(compute_diagram(parse_x4(l)[['Val1', 'Val2', 'Val3']].values))
    if x_type == 'x5':
        for l in L_test:
            diags_test.append(compute_diagram(parse_x5(l)[['X', 'Y', 'Z']].values))
    for i in range(len(diags_test)):
        if len(diags_test[i][h]) == 0:
            diags_test[i] = np.array([[0,0]])
        else:
            diags_test[i] = np.array([[pt.birth,pt.death] for pt in diags_test[i][h]])
    
    print('Computing Kernel')
    
    dd = diags_test
    SW = tda.SlicedWassersteinKernel(num_directions=num_directions, bandwidth=bandwidth)
    
    D = SW.fit_transform(dd)
    
    return (D, y_test)

def get_diag(k, h=1): 
    excel = parse_master_file(exclude_keys=EXCLUDE_KEYS)
    l = get_filename_list(excel[excel['Key'] == k]['Associated data'])[0]
    dgm = compute_diagram(parse_x4(l)[['Val1', 'Val2', 'Val3']].values)
    return dgm
    
    

excel = parse_master_file(exclude_keys=EXCLUDE_KEYS).reset_index(drop=True)
keys = list(excel['Key'])
L = get_filename_list(excel['Associated data'])



# exclude_keys_min_test = [k for k in EXCLUDE_KEYS if k not in TEST_KEYS]
# excel_test = parse_master_file(exclude_keys=(keys+exclude_keys_min_test)).reset_index(drop=True)
# L_test = get_filename_list(excel_test['Associated data'])

x10_x17 = excel.iloc[:, 3:-3]
x10_x17.head()

Cython not found--WassersteinDistance not available
Gudhi not found--GraphInducedComplex not available
data_collection
[3, 9]


Unnamed: 0,X10: Category Method,X11: Temperature (K),X12: [Salt*Valency],X13: Category Salt type,X14: [Buffer] (mM),X15: pH,X16: CI #,X17: CI
0,A,298.0,0,0,0,7.0,0,N
1,B,298.0,0,0,50,4.74,0,N
2,A,298.0,0,0,0,7.0,1,CI_2
3,A,298.0,0,0,0,7.0,2,CI_1
4,A,298.0,0,0,0,7.0,2,CI_2


In [2]:
master, master_names = prepare_master(x10_x17)
K_train = master
K_train = rbf_kernel(K_train)
K_test = np.delete(K_train[191:193], [191,192], axis=1)
K_train = np.delete(np.delete(K_train, [191,192], axis=0), [191,192], axis=1)

D_train_x3 = create_x3_matrix(L)
D_train_x3 = laplacian_kernel(D_train_x3)
D_test_x3 = np.delete(D_train_x3[191:193], [191,192], axis=1)
D_train_x3 = np.delete(np.delete(D_train_x3, [191,192], axis=0), [191,192], axis=1)

D_train_x6 = create_x6_matrix(L, max_dims=(50, 6))
D_train_x6 = laplacian_kernel(D_train_x6)
D_test_x6 = np.delete(D_train_x6[191:193], [191,192], axis=1)
D_train_x6 = np.delete(np.delete(D_train_x6, [191,192], axis=0), [191,192], axis=1)

50 50


# X1

In [3]:
Gammas = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
Cs = [0.1, 1, 5, 10]
Epsilons = [1e-4, 1e-2, 1e-1, 1, 10]
num_directions = [1, 2]
bandwidths = [1.0, 2.0]

results = []

for direction in num_directions:
    for bandwidth in bandwidths:
        train_set_x1 = create_test_diagram_kernels(num_directions=direction, bandwidth=bandwidth, h=1, x_type='x1')
        D_train_x1 = train_set_x1[0]
        D_test_x1 = np.delete(D_train_x1[191:193], [191,192], axis=1)
        D_train_x1 = np.delete(np.delete(D_train_x1, [191,192], axis=0), [191,192], axis=1)
        y_train = train_set_x1[1]
        y_test = y_train[191:193]
        y_train = np.delete(y_train, [191,192], axis=0)
        for gamma in Gammas:
            for epsilon in Epsilons:
                for c in Cs:
                    DD_train = D_train_x1
                    DD_test = D_test_x1

                    clf = svm.SVR(kernel='precomputed', epsilon=epsilon, C=c)
                    cvs = cross_val_score(clf, DD_train, y=y_train, cv=5, scoring='neg_mean_absolute_error')
                    clf.fit(DD_train,y_train)
                    prediction = clf.predict(DD_test)
                    score = np.mean(np.abs(prediction - np.array(y_test)))
                    results.append({'score':score, 'cvs':cvs, 'mean_cvs':cvs.mean(), 'predicted_0':prediction[0], 'predicted_1':prediction[1], 'actual_0':y_test[0], 'actual_1':y_test[1], 'direction':direction, 'bandwidth':bandwidth, 'gamma':gamma, 'epsilon':epsilon, 'c':c})

                    print('Score: {}, Direction: {}, Bandwidth: {}, Gamma: {}, Epsilon: {}, C: {}'.format(score, direction, bandwidth, gamma, epsilon, c))
                    
res_df_x1 = pd.DataFrame(results)

[3, 9]
Computing Test Diagrams
Computing Kernel
Score: 3.254403026186872, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 0.1
Score: 2.94917643591677, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 1
Score: 2.062706550292531, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 5
Score: 1.5018902236663885, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 10
Score: 3.2543994237423357, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 0.1
Score: 2.9516062979292426, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 1
Score: 2.06397132563255, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 5
Score: 1.5076461313897664, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 10
Score: 3.253095403722699, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.1, C: 0.1
Score: 2.945745502714115, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.1, C: 1
Score: 2.0823752589446856,

Score: 4.58, Direction: 1, Bandwidth: 1.0, Gamma: 1, Epsilon: 10, C: 5
Score: 4.58, Direction: 1, Bandwidth: 1.0, Gamma: 1, Epsilon: 10, C: 10
Score: 3.254403026186872, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.0001, C: 0.1
Score: 2.94917643591677, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.0001, C: 1
Score: 2.062706550292531, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.0001, C: 5
Score: 1.5018902236663885, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.0001, C: 10
Score: 3.2543994237423357, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.01, C: 0.1
Score: 2.9516062979292426, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.01, C: 1
Score: 2.06397132563255, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.01, C: 5
Score: 1.5076461313897664, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.01, C: 10
Score: 3.253095403722699, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.1, C: 0.1
Score: 2.945745502714115, Direction: 1, Bandwidth: 1.0, G

Score: 4.58, Direction: 1, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 10, C: 10
Score: 3.265158756390886, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.0001, C: 0.1
Score: 3.071007413767063, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.0001, C: 1
Score: 2.317554422236486, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.0001, C: 5
Score: 1.7845003744162988, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.0001, C: 10
Score: 3.265171789582368, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.01, C: 0.1
Score: 3.068741093629324, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.01, C: 1
Score: 2.3124228627112644, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.01, C: 5
Score: 1.7883856907865021, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.01, C: 10
Score: 3.2649550388569994, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.1, C: 0.1
Score: 3.0664846148419396, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.1, C: 1
Score: 2.297268310938143, Direction: 1, Bandwid

Score: 2.2580229508872227, Direction: 2, Bandwidth: 1.0, Gamma: 0.01, Epsilon: 1, C: 5
Score: 2.119817842153271, Direction: 2, Bandwidth: 1.0, Gamma: 0.01, Epsilon: 1, C: 10
Score: 4.58, Direction: 2, Bandwidth: 1.0, Gamma: 0.01, Epsilon: 10, C: 0.1
Score: 4.58, Direction: 2, Bandwidth: 1.0, Gamma: 0.01, Epsilon: 10, C: 1
Score: 4.58, Direction: 2, Bandwidth: 1.0, Gamma: 0.01, Epsilon: 10, C: 5
Score: 4.58, Direction: 2, Bandwidth: 1.0, Gamma: 0.01, Epsilon: 10, C: 10
Score: 3.232743545991451, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.0001, C: 0.1
Score: 2.8865012368277934, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.0001, C: 1
Score: 2.1932019309138333, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.0001, C: 5
Score: 1.7973636808208044, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.0001, C: 10
Score: 3.2324761993752684, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.01, C: 0.1
Score: 2.887996431044965, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Ep

Score: 3.2371247699666545, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.0001, C: 0.1
Score: 2.9563102613834564, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.0001, C: 1
Score: 2.3258247211351457, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.0001, C: 5
Score: 2.1315608021318333, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.0001, C: 10
Score: 3.2378584399839028, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.01, C: 0.1
Score: 2.9548727779623687, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.01, C: 1
Score: 2.329056060676992, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.01, C: 5
Score: 2.133666407529363, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.01, C: 10
Score: 3.2385277412889604, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.1, C: 0.1
Score: 2.946693230146788, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.1, C: 1
Score: 2.3718704187274886, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.1, C: 5


In [9]:
res_df_x1[res_df_x1['score'] == res_df_x1['score'].min()]

Unnamed: 0,actual_0,actual_1,bandwidth,c,cvs,direction,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
3,6.54,0.0,1.0,10.0,"[-2.485057292009884, -3.008562203088042, -2.17...",1,0.0001,0.0001,-2.423302,6.145193,2.608974,1.50189
23,6.54,0.0,1.0,10.0,"[-2.485057292009884, -3.008562203088042, -2.17...",1,0.0001,0.001,-2.423302,6.145193,2.608974,1.50189
43,6.54,0.0,1.0,10.0,"[-2.485057292009884, -3.008562203088042, -2.17...",1,0.0001,0.01,-2.423302,6.145193,2.608974,1.50189
63,6.54,0.0,1.0,10.0,"[-2.485057292009884, -3.008562203088042, -2.17...",1,0.0001,0.1,-2.423302,6.145193,2.608974,1.50189
83,6.54,0.0,1.0,10.0,"[-2.485057292009884, -3.008562203088042, -2.17...",1,0.0001,1.0,-2.423302,6.145193,2.608974,1.50189
103,6.54,0.0,1.0,10.0,"[-2.485057292009884, -3.008562203088042, -2.17...",1,0.0001,10.0,-2.423302,6.145193,2.608974,1.50189


In [12]:
res_df_x1[res_df_x1['mean_cvs'] == res_df_x1['mean_cvs'].max()]

Unnamed: 0,actual_0,actual_1,bandwidth,c,cvs,direction,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
365,6.54,0.0,2.0,1.0,"[-1.9082467102459366, -2.496751555719141, -1.4...",2,0.01,0.0001,-2.0381,6.516083,5.885829,2.954873
385,6.54,0.0,2.0,1.0,"[-1.9082467102459366, -2.496751555719141, -1.4...",2,0.01,0.001,-2.0381,6.516083,5.885829,2.954873
405,6.54,0.0,2.0,1.0,"[-1.9082467102459366, -2.496751555719141, -1.4...",2,0.01,0.01,-2.0381,6.516083,5.885829,2.954873
425,6.54,0.0,2.0,1.0,"[-1.9082467102459366, -2.496751555719141, -1.4...",2,0.01,0.1,-2.0381,6.516083,5.885829,2.954873
445,6.54,0.0,2.0,1.0,"[-1.9082467102459366, -2.496751555719141, -1.4...",2,0.01,1.0,-2.0381,6.516083,5.885829,2.954873
465,6.54,0.0,2.0,1.0,"[-1.9082467102459366, -2.496751555719141, -1.4...",2,0.01,10.0,-2.0381,6.516083,5.885829,2.954873


In [22]:
res_df_x1.to_csv('../data/results/both/x1.csv')

# X3

In [4]:
Gammas = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
Cs = [0.1, 1, 5, 10]
Epsilons = [1e-4, 1e-2, 1e-1, 1, 10]
num_directions = [1, 2]
bandwidths = [1.0, 2.0]

results = []

train_set_x1 = create_test_diagram_kernels(num_directions=direction, bandwidth=bandwidth, h=1, x_type='x1')
y_train = train_set_x1[1]
y_test = y_train[191:193]
y_train = np.delete(y_train, [191,192], axis=0)


for gamma in Gammas:
    D_train_x3 = create_x3_matrix(L)
    D_train_x3 = laplacian_kernel(D_train_x3, gamma=gamma)
    D_test_x3 = np.delete(D_train_x3[191:193], [191,192], axis=1)
    D_train_x3 = np.delete(np.delete(D_train_x3, [191,192], axis=0), [191,192], axis=1)
    for epsilon in Epsilons:
        for c in Cs:
            DD_train = D_train_x3
            DD_test = D_test_x3

            clf = svm.SVR(kernel='precomputed', epsilon=epsilon, C=c)
            cvs = cross_val_score(clf, DD_train, y=y_train, cv=5, scoring='neg_mean_absolute_error')
            clf.fit(DD_train,y_train)
            prediction = clf.predict(DD_test)
            score = np.mean(np.abs(prediction - np.array(y_test)))
            results.append({'score':score, 'cvs':cvs, 'mean_cvs':cvs.mean(), 'predicted_0':prediction[0], 'predicted_1':prediction[1], 'actual_0':y_test[0], 'actual_1':y_test[1], 'gamma':gamma, 'epsilon':epsilon, 'c':c})

            print('Score: {}, Gamma: {}, Epsilon: {}, C: {}'.format(score, gamma, epsilon, c))
                    
res_df_x3 = pd.DataFrame(results)

[3, 9]
Computing Test Diagrams
Computing Kernel
50 50
Score: 3.269572518231637, Gamma: 0.0001, Epsilon: 0.0001, C: 0.1
Score: 3.2661608875718944, Gamma: 0.0001, Epsilon: 0.0001, C: 1
Score: 3.2698145003299137, Gamma: 0.0001, Epsilon: 0.0001, C: 5
Score: 3.2402278339469333, Gamma: 0.0001, Epsilon: 0.0001, C: 10
Score: 3.2695291232812456, Gamma: 0.0001, Epsilon: 0.01, C: 0.1
Score: 3.2661608875718944, Gamma: 0.0001, Epsilon: 0.01, C: 1
Score: 3.270194538019118, Gamma: 0.0001, Epsilon: 0.01, C: 5
Score: 3.241092094004473, Gamma: 0.0001, Epsilon: 0.01, C: 10
Score: 3.2710767928350517, Gamma: 0.0001, Epsilon: 0.1, C: 0.1
Score: 3.2666511564480336, Gamma: 0.0001, Epsilon: 0.1, C: 1
Score: 3.2705225887866645, Gamma: 0.0001, Epsilon: 0.1, C: 5
Score: 3.2605902489695366, Gamma: 0.0001, Epsilon: 0.1, C: 10
Score: 3.273933163171513, Gamma: 0.0001, Epsilon: 1, C: 0.1
Score: 3.308865597753001, Gamma: 0.0001, Epsilon: 1, C: 1
Score: 3.429352686198455, Gamma: 0.0001, Epsilon: 1, C: 5
Score: 3.5711390

In [11]:
res_df_x3[res_df_x3['score'] == res_df_x3['score'].min()]

Unnamed: 0,actual_0,actual_1,c,cvs,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
3,6.54,0.0,10.0,"[-1.906224033039445, -2.5968577483589312, -1.2...",0.0001,0.0001,-2.094281,5.598758,5.539214,3.240228


In [13]:
res_df_x3[res_df_x3['mean_cvs'] == res_df_x3['mean_cvs'].max()]

Unnamed: 0,actual_0,actual_1,c,cvs,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
34,6.54,0.0,5.0,"[-1.7956596045142288, -2.301779520728811, -1.2...",1.0,0.001,-2.031027,4.666174,5.477369,3.675598


In [23]:
res_df_x3.to_csv('../data/results/both/x3.csv')

# X4

In [5]:
Gammas = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
Cs = [0.1, 1, 5, 10]
Epsilons = [1e-4, 1e-2, 1e-1, 1, 10]
num_directions = [1, 2]
bandwidths = [1.0, 2.0]

results = []

for direction in num_directions:
    for bandwidth in bandwidths:
        train_set_x4 = create_test_diagram_kernels(num_directions=direction, bandwidth=bandwidth, h=1, x_type='x4')
        D_train_x4 = train_set_x4[0]
        D_test_x4 = np.delete(D_train_x4[191:193], [191,192], axis=1)
        D_train_x4 = np.delete(np.delete(D_train_x4, [191,192], axis=0), [191,192], axis=1)
        y_train = train_set_x4[1]
        y_test = y_train[191:193]
        y_train = np.delete(y_train, [191,192], axis=0)
        for gamma in Gammas:
            for epsilon in Epsilons:
                for c in Cs:
                    DD_train = D_train_x4
                    DD_test = D_test_x4

                    clf = svm.SVR(kernel='precomputed', epsilon=epsilon, C=c)
                    cvs = cross_val_score(clf, DD_train, y=y_train, cv=5, scoring='neg_mean_absolute_error')
                    clf.fit(DD_train,y_train)
                    prediction = clf.predict(DD_test)
                    score = np.mean(np.abs(prediction - np.array(y_test)))
                    results.append({'score':score, 'cvs':cvs, 'mean_cvs':cvs.mean(), 'predicted_0':prediction[0], 'predicted_1':prediction[1], 'actual_0':y_test[0], 'actual_1':y_test[1], 'direction':direction, 'bandwidth':bandwidth, 'gamma':gamma, 'epsilon':epsilon, 'c':c})

                    print('Score: {}, Direction: {}, Bandwidth: {}, Gamma: {}, Epsilon: {}, C: {}'.format(score, direction, bandwidth, gamma, epsilon, c))
                    
res_df_x4 = pd.DataFrame(results)

[3, 9]
Computing Test Diagrams
Computing Kernel
Score: 3.255139732079144, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 0.1
Score: 2.949309715653808, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 1
Score: 2.0796570623762025, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 5
Score: 1.5122564380198646, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 10
Score: 3.2550331123822063, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 0.1
Score: 2.9517719407743668, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 1
Score: 2.0808568586646907, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 5
Score: 1.517884121572691, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 10
Score: 3.253853337320183, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.1, C: 0.1
Score: 2.9455424582998617, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.1, C: 1
Score: 2.0972442307344

Score: 2.9455424582998617, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.1, C: 1
Score: 2.0972442307344177, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.1, C: 5
Score: 1.5710081135935297, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.1, C: 10
Score: 3.2338703979371397, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 1, C: 0.1
Score: 2.898302330577636, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 1, C: 1
Score: 2.1857028581738343, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 1, C: 5
Score: 1.9110342959952107, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 1, C: 10
Score: 4.58, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 10, C: 0.1
Score: 4.58, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 10, C: 1
Score: 4.58, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 10, C: 5
Score: 4.58, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 10, C: 10
[3, 9]
Computing Test Diagrams
Computing Kernel
Score: 3.2657725707182927, Direction: 1, Bandwidth: 2.0, 

Score: 2.39378987975027, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 1, C: 5
Score: 1.8719430136003008, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 1, C: 10
Score: 4.58, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 10, C: 0.1
Score: 4.58, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 10, C: 1
Score: 4.58, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 10, C: 5
Score: 4.58, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 10, C: 10
Score: 3.2657725707182927, Direction: 1, Bandwidth: 2.0, Gamma: 10, Epsilon: 0.0001, C: 0.1
Score: 3.0640924269748835, Direction: 1, Bandwidth: 2.0, Gamma: 10, Epsilon: 0.0001, C: 1
Score: 2.315440511673287, Direction: 1, Bandwidth: 2.0, Gamma: 10, Epsilon: 0.0001, C: 5
Score: 1.8143646440430143, Direction: 1, Bandwidth: 2.0, Gamma: 10, Epsilon: 0.0001, C: 10
Score: 3.2657835266255932, Direction: 1, Bandwidth: 2.0, Gamma: 10, Epsilon: 0.01, C: 0.1
Score: 3.0625740241462207, Direction: 1, Bandwidth: 2.0, Gamma: 10, Epsilon: 0.01, C: 1
Score:

Score: 4.58, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 10, C: 5
Score: 4.58, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 10, C: 10
Score: 3.2332751564104343, Direction: 2, Bandwidth: 1.0, Gamma: 1, Epsilon: 0.0001, C: 0.1
Score: 2.890675654942828, Direction: 2, Bandwidth: 1.0, Gamma: 1, Epsilon: 0.0001, C: 1
Score: 2.211651057760042, Direction: 2, Bandwidth: 1.0, Gamma: 1, Epsilon: 0.0001, C: 5
Score: 1.8125675558733185, Direction: 2, Bandwidth: 1.0, Gamma: 1, Epsilon: 0.0001, C: 10
Score: 3.233018289230565, Direction: 2, Bandwidth: 1.0, Gamma: 1, Epsilon: 0.01, C: 0.1
Score: 2.892146806054632, Direction: 2, Bandwidth: 1.0, Gamma: 1, Epsilon: 0.01, C: 1
Score: 2.2122432378851515, Direction: 2, Bandwidth: 1.0, Gamma: 1, Epsilon: 0.01, C: 5
Score: 1.8175483670506998, Direction: 2, Bandwidth: 1.0, Gamma: 1, Epsilon: 0.01, C: 10
Score: 3.2293500841359903, Direction: 2, Bandwidth: 1.0, Gamma: 1, Epsilon: 0.1, C: 0.1
Score: 2.8879634479829046, Direction: 2, Bandwidth: 1.0, Ga

Score: 2.960274792364973, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.0001, C: 1
Score: 2.3382887854177787, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.0001, C: 5
Score: 2.175846708799522, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.0001, C: 10
Score: 3.2385257754557837, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.01, C: 0.1
Score: 2.958410527822707, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.01, C: 1
Score: 2.341959621053985, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.01, C: 5
Score: 2.1785766192438594, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.01, C: 10
Score: 3.239240340885281, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.1, C: 0.1
Score: 2.950753045695813, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.1, C: 1
Score: 2.3805794986369637, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.1, C: 5
Score: 2.1687945974230676, Direction: 2, Bandwidth: 2.0, Gamma: 0.1, Epsilon: 0.1, C: 10
Score: 3.235242691

In [14]:
res_df_x4[res_df_x4['score'] == res_df_x4['score'].min()]

Unnamed: 0,actual_0,actual_1,bandwidth,c,cvs,direction,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
3,6.54,0.0,1.0,10.0,"[-2.491382698636401, -2.979947382050096, -2.18...",1,0.0001,0.0001,-2.415967,6.174872,2.659385,1.512256
23,6.54,0.0,1.0,10.0,"[-2.491382698636401, -2.979947382050096, -2.18...",1,0.0001,0.001,-2.415967,6.174872,2.659385,1.512256
43,6.54,0.0,1.0,10.0,"[-2.491382698636401, -2.979947382050096, -2.18...",1,0.0001,0.01,-2.415967,6.174872,2.659385,1.512256
63,6.54,0.0,1.0,10.0,"[-2.491382698636401, -2.979947382050096, -2.18...",1,0.0001,0.1,-2.415967,6.174872,2.659385,1.512256
83,6.54,0.0,1.0,10.0,"[-2.491382698636401, -2.979947382050096, -2.18...",1,0.0001,1.0,-2.415967,6.174872,2.659385,1.512256
103,6.54,0.0,1.0,10.0,"[-2.491382698636401, -2.979947382050096, -2.18...",1,0.0001,10.0,-2.415967,6.174872,2.659385,1.512256


In [15]:
res_df_x4[res_df_x4['mean_cvs'] == res_df_x4['mean_cvs'].max()]

Unnamed: 0,actual_0,actual_1,bandwidth,c,cvs,direction,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
365,6.54,0.0,2.0,1.0,"[-1.907159529455516, -2.4963050885368014, -1.4...",2,0.01,0.0001,-2.038528,6.516358,5.893179,2.958411
385,6.54,0.0,2.0,1.0,"[-1.907159529455516, -2.4963050885368014, -1.4...",2,0.01,0.001,-2.038528,6.516358,5.893179,2.958411
405,6.54,0.0,2.0,1.0,"[-1.907159529455516, -2.4963050885368014, -1.4...",2,0.01,0.01,-2.038528,6.516358,5.893179,2.958411
425,6.54,0.0,2.0,1.0,"[-1.907159529455516, -2.4963050885368014, -1.4...",2,0.01,0.1,-2.038528,6.516358,5.893179,2.958411
445,6.54,0.0,2.0,1.0,"[-1.907159529455516, -2.4963050885368014, -1.4...",2,0.01,1.0,-2.038528,6.516358,5.893179,2.958411
465,6.54,0.0,2.0,1.0,"[-1.907159529455516, -2.4963050885368014, -1.4...",2,0.01,10.0,-2.038528,6.516358,5.893179,2.958411


In [24]:
res_df_x4.to_csv('../data/results/both/x4.csv')

# X5

In [6]:
Gammas = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
Cs = [0.1, 1, 5, 10]
Epsilons = [1e-4, 1e-2, 1e-1, 1, 10]
num_directions = [1, 2]
bandwidths = [1.0, 2.0]

results = []

for direction in num_directions:
    for bandwidth in bandwidths:
        train_set_x5 = create_test_diagram_kernels(num_directions=direction, bandwidth=bandwidth, h=1, x_type='x5')
        D_train_x5 = train_set_x5[0]
        D_test_x5 = np.delete(D_train_x5[191:193], [191,192], axis=1)
        D_train_x5 = np.delete(np.delete(D_train_x5, [191,192], axis=0), [191,192], axis=1)
        y_train = train_set_x5[1]
        y_test = y_train[191:193]
        y_train = np.delete(y_train, [191,192], axis=0)
        for gamma in Gammas:
            for epsilon in Epsilons:
                for c in Cs:
                    DD_train = D_train_x5
                    DD_test = D_test_x5

                    clf = svm.SVR(kernel='precomputed', epsilon=epsilon, C=c)
                    cvs = cross_val_score(clf, DD_train, y=y_train, cv=5, scoring='neg_mean_absolute_error')
                    clf.fit(DD_train,y_train)
                    prediction = clf.predict(DD_test)
                    score = np.mean(np.abs(prediction - np.array(y_test)))
                    results.append({'score':score, 'cvs':cvs, 'mean_cvs':cvs.mean(), 'predicted_0':prediction[0], 'predicted_1':prediction[1], 'actual_0':y_test[0], 'actual_1':y_test[1], 'direction':direction, 'bandwidth':bandwidth, 'gamma':gamma, 'epsilon':epsilon, 'c':c})

                    print('Score: {}, Direction: {}, Bandwidth: {}, Gamma: {}, Epsilon: {}, C: {}'.format(score, direction, bandwidth, gamma, epsilon, c))
                    
res_df_x5 = pd.DataFrame(results)

[3, 9]
Computing Test Diagrams
Computing Kernel
Score: 3.2719065005468995, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 0.1
Score: 3.2890650054689994, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 1
Score: 3.339745943710071, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 5
Score: 3.4094918874201423, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.0001, C: 10
Score: 3.271937251413983, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 0.1
Score: 3.289321086813282, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 1
Score: 3.340048182232858, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 5
Score: 3.4106413990750046, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.01, C: 10
Score: 3.2717297972349586, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.1, C: 0.1
Score: 3.287297972349595, Direction: 1, Bandwidth: 1.0, Gamma: 0.0001, Epsilon: 0.1, C: 1
Score: 3.34500603479129

Score: 3.339745943710071, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.0001, C: 5
Score: 3.4094918874201423, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.0001, C: 10
Score: 3.271937251413983, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.01, C: 0.1
Score: 3.289321086813282, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.01, C: 1
Score: 3.340048182232858, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.01, C: 5
Score: 3.4106413990750046, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.01, C: 10
Score: 3.2717297972349586, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.1, C: 0.1
Score: 3.287297972349595, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.1, C: 1
Score: 3.345006034791299, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.1, C: 5
Score: 3.41972304634858, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 0.1, C: 10
Score: 3.272058466036742, Direction: 1, Bandwidth: 1.0, Gamma: 10, Epsilon: 1, C: 0.1
Score: 3.2905846603674282, Directio

Score: 3.270975385156004, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.01, C: 0.1
Score: 3.2797538515600437, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.01, C: 1
Score: 3.311584833510539, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.01, C: 5
Score: 3.340638114291093, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.01, C: 10
Score: 3.2708713679718344, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.1, C: 0.1
Score: 3.278713679718347, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.1, C: 1
Score: 3.313568398591665, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.1, C: 5
Score: 3.3455881466600457, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 0.1, C: 10
Score: 3.2710355159284976, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 1, C: 0.1
Score: 3.280355159284976, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 1, C: 1
Score: 3.321775796424854, Direction: 1, Bandwidth: 2.0, Gamma: 1, Epsilon: 1, C: 5
Score: 3.3735515928497084, Direction: 1, Bandwidth: 2.0

Score: 3.2842816454939543, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.01, C: 1
Score: 3.3301573726114952, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.01, C: 5
Score: 3.3677344287658317, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.01, C: 10
Score: 3.271281218884406, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.1, C: 0.1
Score: 3.2828121888440744, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.1, C: 1
Score: 3.330182157639941, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.1, C: 5
Score: 3.3878222052357247, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 0.1, C: 10
Score: 3.2715739865598996, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 1, C: 0.1
Score: 3.2857398655990036, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 1, C: 1
Score: 3.348699327994993, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 1, C: 5
Score: 3.4289911497443586, Direction: 2, Bandwidth: 1.0, Gamma: 0.1, Epsilon: 1, C: 10
Score: 4.58, Direction: 2, Bandwi

Score: 3.277195409296016, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.01, C: 1
Score: 3.3039718415222, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.01, C: 5
Score: 3.3306703005105827, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.01, C: 10
Score: 3.2706458532782916, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.1, C: 0.1
Score: 3.2764585327829114, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.1, C: 1
Score: 3.3017340875148147, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.1, C: 5
Score: 3.330646640794235, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 0.1, C: 10
Score: 3.2707920461103432, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 1, C: 0.1
Score: 3.2779204611034265, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 1, C: 1
Score: 3.309602305517119, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 1, C: 5
Score: 3.349204611034238, Direction: 2, Bandwidth: 2.0, Gamma: 0.01, Epsilon: 1, C: 10
Score: 4.58, Direction: 2,

In [16]:
res_df_x5[res_df_x5['score'] == res_df_x5['score'].min()]

Unnamed: 0,actual_0,actual_1,bandwidth,c,cvs,direction,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
368,6.54,0.0,2.0,0.1,"[-1.9283252976554428, -2.731725095629041, -1.2...",2,0.1,0.0001,-2.065398,5.960165,5.961457,3.270646
388,6.54,0.0,2.0,0.1,"[-1.9283252976554428, -2.731725095629041, -1.2...",2,0.1,0.001,-2.065398,5.960165,5.961457,3.270646
408,6.54,0.0,2.0,0.1,"[-1.9283252976554428, -2.731725095629041, -1.2...",2,0.1,0.01,-2.065398,5.960165,5.961457,3.270646
428,6.54,0.0,2.0,0.1,"[-1.9283252976554428, -2.731725095629041, -1.2...",2,0.1,0.1,-2.065398,5.960165,5.961457,3.270646
448,6.54,0.0,2.0,0.1,"[-1.9283252976554428, -2.731725095629041, -1.2...",2,0.1,1.0,-2.065398,5.960165,5.961457,3.270646
468,6.54,0.0,2.0,0.1,"[-1.9283252976554428, -2.731725095629041, -1.2...",2,0.1,10.0,-2.065398,5.960165,5.961457,3.270646


In [17]:
res_df_x5[res_df_x5['mean_cvs'] == res_df_x5['mean_cvs'].max()]

Unnamed: 0,actual_0,actual_1,bandwidth,c,cvs,direction,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
15,6.54,0.0,1.0,10.0,"[-1.9038247931765373, -2.7325635159453903, -1....",1,1.0,0.0001,-2.043055,5.885261,6.298175,3.476457
35,6.54,0.0,1.0,10.0,"[-1.9038247931765373, -2.7325635159453903, -1....",1,1.0,0.001,-2.043055,5.885261,6.298175,3.476457
55,6.54,0.0,1.0,10.0,"[-1.9038247931765373, -2.7325635159453903, -1....",1,1.0,0.01,-2.043055,5.885261,6.298175,3.476457
75,6.54,0.0,1.0,10.0,"[-1.9038247931765373, -2.7325635159453903, -1....",1,1.0,0.1,-2.043055,5.885261,6.298175,3.476457
95,6.54,0.0,1.0,10.0,"[-1.9038247931765373, -2.7325635159453903, -1....",1,1.0,1.0,-2.043055,5.885261,6.298175,3.476457
115,6.54,0.0,1.0,10.0,"[-1.9038247931765373, -2.7325635159453903, -1....",1,1.0,10.0,-2.043055,5.885261,6.298175,3.476457


In [25]:
res_df_x5.to_csv('../data/results/both/x5.csv')

# X5B

In [50]:
Gammas = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
Cs = [0.1, 1, 5, 10]
Epsilons = [1e-4, 1e-2, 1e-1, 1, 10]
num_directions = [1, 2]
bandwidths = [1.0, 2.0]

results = []

train_set_x1 = create_test_diagram_kernels(num_directions=direction, bandwidth=bandwidth, h=1, x_type='x1')
y_train = train_set_x1[1]
y_test = y_train[191:193]
y_train = np.delete(y_train, [191,192], axis=0)


for gamma in Gammas:
    D_train_x5b = create_x5_matrix(L)[:,0].reshape(-1, 1)
    D_train_x5b = rbf_kernel(D_train_x5b, gamma=gamma)
    D_test_x5b = np.delete(D_train_x5b[191:193], [191,192], axis=1)
    D_train_x5b = np.delete(np.delete(D_train_x5b, [191,192], axis=0), [191,192], axis=1)
    for epsilon in Epsilons:
        for c in Cs:
            DD_train = D_train_x5b
            DD_test = D_test_x5b

            clf = svm.SVR(kernel='precomputed', epsilon=epsilon, C=c)
            cvs = cross_val_score(clf, DD_train, y=y_train, cv=5, scoring='neg_mean_absolute_error')
            clf.fit(DD_train,y_train)
            prediction = clf.predict(DD_test)
            score = np.mean(np.abs(prediction - np.array(y_test)))
            results.append({'score':score, 'cvs':cvs, 'mean_cvs':cvs.mean(), 'predicted_0':prediction[0], 'predicted_1':prediction[1], 'actual_0':y_test[0], 'actual_1':y_test[1], 'gamma':gamma, 'epsilon':epsilon, 'c':c})

            print('Score: {}, Gamma: {}, Epsilon: {}, C: {}'.format(score, gamma, epsilon, c))
                    
res_df_x5b = pd.DataFrame(results)

[3, 9]
Computing Test Diagrams
Computing Kernel
Score: 3.2699144373879285, Gamma: 0.0001, Epsilon: 0.0001, C: 0.1
Score: 3.2693605067797855, Gamma: 0.0001, Epsilon: 0.0001, C: 1
Score: 3.268469095109149, Gamma: 0.0001, Epsilon: 0.0001, C: 5
Score: 3.267835768254003, Gamma: 0.0001, Epsilon: 0.0001, C: 10
Score: 3.26991453930364, Gamma: 0.0001, Epsilon: 0.01, C: 0.1
Score: 3.2693478626973342, Gamma: 0.0001, Epsilon: 0.01, C: 1
Score: 3.2684800695719596, Gamma: 0.0001, Epsilon: 0.01, C: 5
Score: 3.267848121578979, Gamma: 0.0001, Epsilon: 0.01, C: 10
Score: 3.2699106545520302, Gamma: 0.0001, Epsilon: 0.1, C: 0.1
Score: 3.269309668268878, Gamma: 0.0001, Epsilon: 0.1, C: 1
Score: 3.268585331867192, Gamma: 0.0001, Epsilon: 0.1, C: 5
Score: 3.2679610488922823, Gamma: 0.0001, Epsilon: 0.1, C: 10
Score: 3.269900909778944, Gamma: 0.0001, Epsilon: 1, C: 0.1
Score: 3.2695041382924352, Gamma: 0.0001, Epsilon: 1, C: 1
Score: 3.268951831107565, Gamma: 0.0001, Epsilon: 1, C: 5
Score: 3.2682736467696825

In [51]:
res_df_x5b[res_df_x5b['score'] == res_df_x5b['score'].min()]

Unnamed: 0,actual_0,actual_1,c,cvs,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
103,6.54,0.0,10.0,"[-2.351883948495412, -2.5924847084574636, -1.2...",0.0001,10.0,-2.035638,6.251219,5.236163,2.762472


In [52]:
res_df_x5b[res_df_x5b['mean_cvs'] == res_df_x5b['mean_cvs'].max()]

Unnamed: 0,actual_0,actual_1,c,cvs,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
43,6.54,0.0,10.0,"[-1.9400076449250794, -2.6376311567624184, -1....",0.0001,0.01,-1.928625,6.285275,6.294457,3.274591


In [53]:
res_df_x5b.to_csv('../data/results/both/x5b.csv')

# X6

In [7]:
Gammas = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
Cs = [0.1, 1, 5, 10]
Epsilons = [1e-4, 1e-2, 1e-1, 1, 10]
num_directions = [1, 2]
bandwidths = [1.0, 2.0]

results = []

train_set_x1 = create_test_diagram_kernels(num_directions=direction, bandwidth=bandwidth, h=1, x_type='x1')
y_train = train_set_x1[1]
y_test = y_train[191:193]
y_train = np.delete(y_train, [191,192], axis=0)


for gamma in Gammas:
    D_train_x6 = create_x6_matrix(L)
    D_train_x6 = laplacian_kernel(D_train_x6, gamma=gamma)
    D_test_x6 = np.delete(D_train_x6[191:193], [191,192], axis=1)
    D_train_x6 = np.delete(np.delete(D_train_x6, [191,192], axis=0), [191,192], axis=1)
    for epsilon in Epsilons:
        for c in Cs:
            DD_train = D_train_x6
            DD_test = D_test_x6

            clf = svm.SVR(kernel='precomputed', epsilon=epsilon, C=c)
            cvs = cross_val_score(clf, DD_train, y=y_train, cv=5, scoring='neg_mean_absolute_error')
            clf.fit(DD_train,y_train)
            prediction = clf.predict(DD_test)
            score = np.mean(np.abs(prediction - np.array(y_test)))
            results.append({'score':score, 'cvs':cvs, 'mean_cvs':cvs.mean(), 'predicted_0':prediction[0], 'predicted_1':prediction[1], 'actual_0':y_test[0], 'actual_1':y_test[1], 'gamma':gamma, 'epsilon':epsilon, 'c':c})

            print('Score: {}, Gamma: {}, Epsilon: {}, C: {}'.format(score, gamma, epsilon, c))
                    
res_df_x6 = pd.DataFrame(results)

[3, 9]
Computing Test Diagrams
Computing Kernel
50 6
Score: 3.260055087203926, Gamma: 0.0001, Epsilon: 0.0001, C: 0.1
Score: 3.260952567510356, Gamma: 0.0001, Epsilon: 0.0001, C: 1
Score: 3.1869715489627666, Gamma: 0.0001, Epsilon: 0.0001, C: 5
Score: 3.164152410162873, Gamma: 0.0001, Epsilon: 0.0001, C: 10
Score: 3.2604822527588584, Gamma: 0.0001, Epsilon: 0.01, C: 0.1
Score: 3.259660575937248, Gamma: 0.0001, Epsilon: 0.01, C: 1
Score: 3.1870697918497854, Gamma: 0.0001, Epsilon: 0.01, C: 5
Score: 3.1642057752158865, Gamma: 0.0001, Epsilon: 0.01, C: 10
Score: 3.261903760263499, Gamma: 0.0001, Epsilon: 0.1, C: 0.1
Score: 3.2495696777738776, Gamma: 0.0001, Epsilon: 0.1, C: 1
Score: 3.1864399770135616, Gamma: 0.0001, Epsilon: 0.1, C: 5
Score: 3.162024922800348, Gamma: 0.0001, Epsilon: 0.1, C: 10
Score: 3.2635144875264843, Gamma: 0.0001, Epsilon: 1, C: 0.1
Score: 3.247298953917804, Gamma: 0.0001, Epsilon: 1, C: 1
Score: 3.239136754201985, Gamma: 0.0001, Epsilon: 1, C: 5
Score: 3.2128586515

In [18]:
res_df_x6[res_df_x6['score'] == res_df_x6['score'].min()]

Unnamed: 0,actual_0,actual_1,c,cvs,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
11,6.54,0.0,10.0,"[-2.7064085108682034, -2.056409997440358, -1.5...",0.1,0.0001,-2.107132,4.833467,4.617517,3.162025


In [19]:
res_df_x6[res_df_x6['mean_cvs'] == res_df_x6['mean_cvs'].max()]

Unnamed: 0,actual_0,actual_1,c,cvs,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
31,6.54,0.0,10.0,"[-1.666939141799715, -1.8901538085456349, -1.3...",0.1,0.001,-1.875871,5.818097,5.818097,3.27


In [26]:
res_df_x6.to_csv('../data/results/both/x6.csv')

# X7

In [44]:
Gammas = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
Cs = [0.1, 1, 5, 10]
Epsilons = [1e-4, 1e-2, 1e-1, 1, 10]
num_directions = [1, 2]
bandwidths = [1.0, 2.0]

results = []

train_set_x1 = create_test_diagram_kernels(num_directions=direction, bandwidth=bandwidth, h=1, x_type='x1')
y_train = train_set_x1[1]
y_test = y_train[191:193]
y_train = np.delete(y_train, [191,192], axis=0)


for gamma in Gammas:
    D_train_x7 = create_x7_matrix(L)
    D_train_x7 = rbf_kernel(D_train_x7, gamma=gamma)
    D_test_x7 = np.delete(D_train_x7[191:193], [191,192], axis=1)
    D_train_x7 = np.delete(np.delete(D_train_x7, [191,192], axis=0), [191,192], axis=1)
    for epsilon in Epsilons:
        for c in Cs:
            DD_train = D_train_x7
            DD_test = D_test_x7

            clf = svm.SVR(kernel='precomputed', epsilon=epsilon, C=c)
            cvs = cross_val_score(clf, DD_train, y=y_train, cv=5, scoring='neg_mean_absolute_error')
            clf.fit(DD_train,y_train)
            prediction = clf.predict(DD_test)
            score = np.mean(np.abs(prediction - np.array(y_test)))
            results.append({'score':score, 'cvs':cvs, 'mean_cvs':cvs.mean(), 'predicted_0':prediction[0], 'predicted_1':prediction[1], 'actual_0':y_test[0], 'actual_1':y_test[1], 'gamma':gamma, 'epsilon':epsilon, 'c':c})

            print('Score: {}, Gamma: {}, Epsilon: {}, C: {}'.format(score, gamma, epsilon, c))
                    
res_df_x7 = pd.DataFrame(results)

[3, 9]
Computing Test Diagrams
Computing Kernel
Score: 3.27, Gamma: 0.0001, Epsilon: 0.0001, C: 0.1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.0001, C: 1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.0001, C: 5
Score: 3.27, Gamma: 0.0001, Epsilon: 0.0001, C: 10
Score: 3.27, Gamma: 0.0001, Epsilon: 0.01, C: 0.1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.01, C: 1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.01, C: 5
Score: 3.27, Gamma: 0.0001, Epsilon: 0.01, C: 10
Score: 3.27, Gamma: 0.0001, Epsilon: 0.1, C: 0.1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.1, C: 1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.1, C: 5
Score: 3.27, Gamma: 0.0001, Epsilon: 0.1, C: 10
Score: 3.27, Gamma: 0.0001, Epsilon: 1, C: 0.1
Score: 3.27, Gamma: 0.0001, Epsilon: 1, C: 1
Score: 3.27, Gamma: 0.0001, Epsilon: 1, C: 5
Score: 3.27, Gamma: 0.0001, Epsilon: 1, C: 10
Score: 4.58, Gamma: 0.0001, Epsilon: 10, C: 0.1
Score: 4.58, Gamma: 0.0001, Epsilon: 10, C: 1
Score: 4.58, Gamma: 0.0001, Epsilon: 10, C: 5
Score: 4.58, Gamma: 0.0001, Epsilon: 10,

In [45]:
res_df_x7[res_df_x7['score'] == res_df_x7['score'].min()]

Unnamed: 0,actual_0,actual_1,c,cvs,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
0,6.54,0.0,0.1,"[-1.8918374745646263, -2.6587424590345345, -1....",0.0001,0.0001,-2.032729,5.958460,5.958460,3.27
1,6.54,0.0,1.0,"[-1.7376155114295169, -2.51995119897169, -1.24...",0.0001,0.0001,-2.004635,6.045549,6.045549,3.27
2,6.54,0.0,5.0,"[-1.5995940131437278, -2.2306608001068122, -1....",0.0001,0.0001,-1.952431,6.099077,6.099077,3.27
3,6.54,0.0,10.0,"[-1.6724204154752715, -2.2294252965362693, -1....",0.0001,0.0001,-1.977338,6.144247,6.144247,3.27
4,6.54,0.0,0.1,"[-1.8922593290891576, -2.6584219462140224, -1....",0.0100,0.0001,-2.032496,5.961876,5.961876,3.27
5,6.54,0.0,1.0,"[-1.7374119330415612, -2.520544019990731, -1.2...",0.0100,0.0001,-2.004597,6.046420,6.046420,3.27
6,6.54,0.0,5.0,"[-1.5993999032440145, -2.2315160739611875, -1....",0.0100,0.0001,-1.952511,6.098764,6.098764,3.27
7,6.54,0.0,10.0,"[-1.6716564503114257, -2.2286365579114236, -1....",0.0100,0.0001,-1.976967,6.143966,6.143966,3.27
8,6.54,0.0,0.1,"[-1.8901648527743093, -2.677484934422303, -1.1...",0.1000,0.0001,-2.033756,5.978734,5.978734,3.27
9,6.54,0.0,1.0,"[-1.7350778588275777, -2.5252631223949984, -1....",0.1000,0.0001,-2.003743,6.044792,6.044792,3.27


In [46]:
res_df_x7[res_df_x7['mean_cvs'] == res_df_x7['mean_cvs'].max()]

Unnamed: 0,actual_0,actual_1,c,cvs,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
2,6.54,0.0,5.0,"[-1.5995940131437278, -2.2306608001068122, -1....",0.0001,0.0001,-1.952431,6.099077,6.099077,3.27


In [47]:
res_df_x7.to_csv('../data/results/both/x7.csv')

# X10-X17

In [8]:
Gammas = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
Cs = [0.1, 1, 5, 10]
Epsilons = [1e-4, 1e-2, 1e-1, 1, 10]
num_directions = [1, 2]
bandwidths = [1.0, 2.0]

results = []

train_set_x1 = create_test_diagram_kernels(num_directions=direction, bandwidth=bandwidth, h=1, x_type='x1')
y_train = train_set_x1[1]
y_test = y_train[191:193]
y_train = np.delete(y_train, [191,192], axis=0)


for gamma in Gammas:
    master, master_names = prepare_master(x10_x17)
    K_train = master
    K_train = rbf_kernel(K_train, gamma=gamma)
    K_test = np.delete(K_train[191:193], [191,192], axis=1)
    K_train = np.delete(np.delete(K_train, [191,192], axis=0), [191,192], axis=1)
    for epsilon in Epsilons:
        for c in Cs:
            DD_train = K_train
            DD_test = K_test

            clf = svm.SVR(kernel='precomputed', epsilon=epsilon, C=c)
            cvs = cross_val_score(clf, DD_train, y=y_train, cv=5, scoring='neg_mean_absolute_error')
            clf.fit(DD_train,y_train)
            prediction = clf.predict(DD_test)
            score = np.mean(np.abs(prediction - np.array(y_test)))
            results.append({'score':score, 'cvs':cvs, 'mean_cvs':cvs.mean(), 'predicted_0':prediction[0], 'predicted_1':prediction[1], 'actual_0':y_test[0], 'actual_1':y_test[1], 'gamma':gamma, 'epsilon':epsilon, 'c':c})

            print('Score: {}, Gamma: {}, Epsilon: {}, C: {}'.format(score, gamma, epsilon, c))
                    
res_df_K = pd.DataFrame(results)

[3, 9]
Computing Test Diagrams
Computing Kernel
Score: 3.27, Gamma: 0.0001, Epsilon: 0.0001, C: 0.1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.0001, C: 1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.0001, C: 5
Score: 3.27, Gamma: 0.0001, Epsilon: 0.0001, C: 10
Score: 3.27, Gamma: 0.0001, Epsilon: 0.01, C: 0.1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.01, C: 1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.01, C: 5
Score: 3.27, Gamma: 0.0001, Epsilon: 0.01, C: 10
Score: 3.27, Gamma: 0.0001, Epsilon: 0.1, C: 0.1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.1, C: 1
Score: 3.27, Gamma: 0.0001, Epsilon: 0.1, C: 5
Score: 3.27, Gamma: 0.0001, Epsilon: 0.1, C: 10
Score: 3.27, Gamma: 0.0001, Epsilon: 1, C: 0.1
Score: 3.27, Gamma: 0.0001, Epsilon: 1, C: 1
Score: 3.27, Gamma: 0.0001, Epsilon: 1, C: 5
Score: 3.27, Gamma: 0.0001, Epsilon: 1, C: 10
Score: 4.58, Gamma: 0.0001, Epsilon: 10, C: 0.1
Score: 4.58, Gamma: 0.0001, Epsilon: 10, C: 1
Score: 4.58, Gamma: 0.0001, Epsilon: 10, C: 5
Score: 4.58, Gamma: 0.0001, Epsilon: 10,

In [20]:
res_df_K[res_df_K['score'] == res_df_K['score'].min()]

Unnamed: 0,actual_0,actual_1,c,cvs,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
0,6.54,0.0,0.1,"[-1.8896908337544636, -2.7027337674628553, -1....",0.0001,0.0001,-2.051859,5.949724,5.949724,3.27
1,6.54,0.0,1.0,"[-1.817159415635319, -2.634939377677258, -1.20...",0.0001,0.0001,-2.038914,6.132003,6.132003,3.27
2,6.54,0.0,5.0,"[-1.8084529324855636, -2.621903922472482, -1.1...",0.0001,0.0001,-2.026899,6.127272,6.127272,3.27
3,6.54,0.0,10.0,"[-1.813052407830166, -2.6127032679455464, -1.1...",0.0001,0.0001,-2.026452,6.074494,6.074494,3.27
4,6.54,0.0,0.1,"[-1.89005640204582, -2.701588447739595, -1.212...",0.0100,0.0001,-2.051647,5.957605,5.957605,3.27
5,6.54,0.0,1.0,"[-1.8174854337610002, -2.6343320665425596, -1....",0.0100,0.0001,-2.037922,6.140780,6.140780,3.27
6,6.54,0.0,5.0,"[-1.8094635148672347, -2.625730936780637, -1.1...",0.0100,0.0001,-2.026957,6.137025,6.137025,3.27
7,6.54,0.0,10.0,"[-1.8124349130807458, -2.609612847925316, -1.1...",0.0100,0.0001,-2.025153,6.084699,6.084699,3.27
8,6.54,0.0,0.1,"[-1.891288529209465, -2.6907992444116777, -1.2...",0.1000,0.0001,-2.051094,6.021495,6.021495,3.27
9,6.54,0.0,1.0,"[-1.8176913408907278, -2.6286326608483934, -1....",0.1000,0.0001,-2.036500,6.079097,6.079097,3.27


In [21]:
res_df_K[res_df_K['mean_cvs'] == res_df_K['mean_cvs'].max()]

Unnamed: 0,actual_0,actual_1,c,cvs,epsilon,gamma,mean_cvs,predicted_0,predicted_1,score
53,6.54,0.0,1.0,"[-1.6799202296391182, -2.519055074311881, -1.1...",1.0,0.01,-1.927706,5.995476,5.995476,3.27


In [27]:
res_df_K.to_csv('../data/results/both/x10-x17.csv')

In [34]:
Gammas = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
Cs = [0.1, 1, 5, 10]
Epsilons = [1e-4, 1e-2, 1e-1, 1, 10]
num_directions = [1, 2]
bandwidths = [1.0, 2.0]
ilocs = [0,1,2,3,4,5,6,7]

results = []

train_set_x1 = create_test_diagram_kernels(num_directions=1, bandwidth=2., h=1, x_type='x1')
D_train_x1 = train_set_x1[0]
D_test_x1 = np.delete(D_train_x1[191:193], [191,192], axis=1)
D_train_x1 = np.delete(np.delete(D_train_x1, [191,192], axis=0), [191,192], axis=1)
y_train = train_set_x1[1]
y_test = y_train[191:193]
y_train = np.delete(y_train, [191,192], axis=0)

D_train_x3 = create_x3_matrix(L)
D_train_x3 = laplacian_kernel(D_train_x3, gamma=0.0001)
D_test_x3 = np.delete(D_train_x3[191:193], [191,192], axis=1)
D_train_x3 = np.delete(np.delete(D_train_x3, [191,192], axis=0), [191,192], axis=1)

train_set_x4 = create_test_diagram_kernels(num_directions=1, bandwidth=2., h=1, x_type='x4')
D_train_x4 = train_set_x4[0]
D_test_x4 = np.delete(D_train_x4[191:193], [191,192], axis=1)
D_train_x4 = np.delete(np.delete(D_train_x4, [191,192], axis=0), [191,192], axis=1)

train_set_x5 = create_test_diagram_kernels(num_directions=1, bandwidth=2., h=1, x_type='x5')
D_train_x5 = train_set_x5[0]
D_test_x5 = np.delete(D_train_x5[191:193], [191,192], axis=1)
D_train_x5 = np.delete(np.delete(D_train_x5, [191,192], axis=0), [191,192], axis=1)

D_train_x6 = create_x6_matrix(L)
D_train_x6 = laplacian_kernel(D_train_x6, gamma=gamma)
D_test_x6 = np.delete(D_train_x6[191:193], [191,192], axis=1)
D_train_x6 = np.delete(np.delete(D_train_x6, [191,192], axis=0), [191,192], axis=1)


for iloc in ilocs:
    master, master_names = prepare_master(x10_x17)
    K_train = master[:,iloc].reshape(-1, 1)
    K_train = rbf_kernel(K_train, gamma=0.0001)
    K_test = np.delete(K_train[191:193], [191,192], axis=1)
    K_train = np.delete(np.delete(K_train, [191,192], axis=0), [191,192], axis=1)

    DD_train = D_train_x1+D_train_x3+D_train_x4+D_train_x5+D_train_x6+K_train
    DD_test = D_test_x1+D_test_x3+D_test_x4+D_test_x5+D_test_x6+K_test

    clf = svm.SVR(kernel='precomputed', epsilon=0.0001, C=10)
    cvs = cross_val_score(clf, DD_train, y=y_train, cv=5, scoring='neg_mean_absolute_error')
    clf.fit(DD_train,y_train)
    prediction = clf.predict(DD_test)
    score = np.mean(np.abs(prediction - np.array(y_test)))
    results.append({'score':score, 'cvs':cvs, 'mean_cvs':cvs.mean(), 'predicted_0':prediction[0], 'predicted_1':prediction[1], 'actual_0':y_test[0], 'actual_1':y_test[1], 'x_':iloc})

    print('Score: {}, Gamma: {}, Epsilon: {}, C: {}'.format(score, gamma, epsilon, c))

res_df_x1_x17 = pd.DataFrame(results)

[3, 9]
Computing Test Diagrams
Computing Kernel
50 50
[3, 9]
Computing Test Diagrams
Computing Kernel
[3, 9]
Computing Test Diagrams
Computing Kernel
50 6
Score: 1.8235996344766332, Gamma: 0.0001, Epsilon: 10, C: 10
Score: 1.8247764985843156, Gamma: 0.0001, Epsilon: 10, C: 10
Score: 1.824390363939456, Gamma: 0.0001, Epsilon: 10, C: 10
Score: 1.8242978896312612, Gamma: 0.0001, Epsilon: 10, C: 10
Score: 1.8262531655405874, Gamma: 0.0001, Epsilon: 10, C: 10
Score: 1.8594793435418233, Gamma: 0.0001, Epsilon: 10, C: 10
Score: 1.8235536509502315, Gamma: 0.0001, Epsilon: 10, C: 10
Score: 1.8243856246912888, Gamma: 0.0001, Epsilon: 10, C: 10


In [35]:
res_df_x1_x17.head()

Unnamed: 0,actual_0,actual_1,cvs,mean_cvs,predicted_0,predicted_1,score,x_
0,6.54,0.0,"[-1.902273508578469, -2.10001813098004, -1.986...",-1.979373,4.554734,1.661933,1.8236,0
1,6.54,0.0,"[-1.9019848934197239, -2.0997192888294247, -1....",-1.978873,4.553575,1.663128,1.824776,1
2,6.54,0.0,"[-1.9008597905016285, -2.099724077170349, -1.9...",-1.978476,4.561746,1.670526,1.82439,2
3,6.54,0.0,"[-1.9019419402752515, -2.1000152369957528, -1....",-1.978947,4.557258,1.665854,1.824298,3
4,6.54,0.0,"[-1.902732717367581, -2.1012378682081567, -1.9...",-1.976236,4.551061,1.663567,1.826253,4


In [36]:
res_df_x1_x17.to_csv('../data/results/both/x1-x17.csv')