In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.spatial.distance import pdist
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.model_selection import cross_val_score

from sklearn import svm

import dionysus as d
import sklearn_tda as tda

from biomarker.data_collection import *

Cython not found--WassersteinDistance not available
Gudhi not found--GraphInducedComplex not available
data_collection


In [2]:
EXCLUDE_KEYS = [206, 205, 184, 183, 82, 81, 45]
TEST_KEYS = [217, 216]#, 215, 214, 213, 212, 211, 210, 209]
NUM_TEST = 10
SIGMA = 1

C = 1
EPSILON = 0.1
GAMMA = 0.1
LIM = 5
MUL = 10

In [3]:
def compute_diagram(points, k=3):
    r = np.max(pdist(points, 'euclidean'))
    f = d.fill_rips(points, k, r)
    m = d.homology_persistence(f)
    return d.init_diagrams(m, f)

def get_max_diag_dim(dgms, h=1):
    m = 0
    for d in dgms:
        n = len(d[h])
        if n > m:
            m = n
    return m

def kernelize_diagrams_rbf(dgms, h=1, sigma=SIGMA, default=10):
    D = np.zeros(shape=(len(dgms), len(dgms)))
    for i in range(len(dgms)):
        print('step: ', i)
        for j in range(len(dgms)):
            if i == j:
                D[i,j] = 0.0
            else:
                xi = np.zeros(((len(dgms[i][h]) if len(dgms[i][h]) > len(dgms[j][h]) else len(dgms[j][h])), 2))
                xj = np.zeros(xi.shape)
                if xj.shape[0] > 0:
                    for ip in range(len(dgms[i][h])):
                        xi[ip,0] = dgms[i][h][ip].birth
                        xi[ip,1] = dgms[i][h][ip].death
                    for jp in range(len(dgms[j][h])):
                        xj[jp,0] = dgms[j][h][ip].birth
                        xj[jp,1] = dgms[j][h][ip].death
                    xi[xi[:,1] >= 1E108] = 0
                    xj[xj[:,1] >= 1E108] = 0
                    D[i,j] = np.exp(np.power((np.linalg.norm(xi - xj, 2)), 2)/(2*sigma))
    #                 D[D >= 1E108] = 0
                else:
                    D[i,j] = 0
                D = np.nan_to_num(D)
    return D

def kernelize_diagrams(dgms, h=1, sigma=SIGMA):
    D = np.zeros(shape=(len(dgms), len(dgms)))
    for i in range(len(dgms)):
        print('step: ', i)
        for j in range(len(dgms)):
            if i == j:
                D[i,j] = 0.0
            else:
                D[i,j] = d.bottleneck_distance(dgms[i][h], dgms[j][h])
#                 D[i,j] = np.exp(d.wasserstein_distance(dgms[i][h], dgms[j][h], q=2)/2*sigma)
    D = np.nan_to_num(D)
    return D


def compute_sample_weights(y, lim=LIM, mul=MUL):
    ret = np.ones(y.shape[0])
    for i in range(y.shape[0]):
        v = y[i]
        if v < lim:
            ret[i] = (lim-v)*mul
    return ret

def create_train_test_matrices(train_keys, test_keys):
    excel = parse_master_file(exclude_keys=EXCLUDE_KEYS)
    test_idxs = list(excel[np.isin(excel['Key'], test_keys)].index)
    excel_test = excel.iloc[test_idxs]
    L_test = get_filename_list(excel_test['Associated data'])
    y_test = excel_test['Output: logK'].values
    y_buck_test = excel_test['Output: logKbucket'].values
    
    excel_train = excel.drop(test_idxs, axis=0)
    L_train = get_filename_list(excel_train['Associated data'])
    y_train = excel_train['Output: logK'].values
    y_buck_train = excel_train['Output: logKbucket'].values
    
    x1_train, x1_dims = create_x1_matrix(L_train, return_dims=True)
    x4_train, x4_dims = create_x4_matrix(L_train, return_dims=True)
    x5_train, x5_dims = create_x5_matrix(L_train, return_dims=True)
    x6_train, x6_dims = create_x6_matrix(L_train, return_dims=True)
    x7_train, x7_dims = create_x7_matrix(L_train, return_dims=True)
    
    x1_test = create_x1_matrix(L_test, max_dims=x1_dims)
    x4_test = create_x4_matrix(L_test, max_dims=x4_dims)
    x5_test = create_x5_matrix(L_test, max_dims=x5_dims)
    x6_test = create_x6_matrix(L_test, max_dims=x6_dims)
    x7_test = create_x7_matrix(L_test, max_dims=x7_dims)
    
    x10_x17_train = excel_train.iloc[:, 3:-2]
    master_train, master_names = prepare_master(x10_x17_train)
    
    x10_x17_test = excel_test.iloc[:, 3:-2]
    master_test, master_names = prepare_master(x10_x17_test)
    
    X_train = np.hstack((x1_train,x4_train,x5_train,x6_train,x7_train))
    X_test = np.hstack((x1_test, x4_test, x5_test, x6_test, x7_test))
    
    X = np.vstack((X_train,X_test))
    K = rbf_kernel(X)
    X_train = K[:len(y_train), :len(y_train)]
    X_test = K[len(y_train):, :len(y_train)]
    
    return (X_train, y_train), (X_test, y_test)

def create_train_test_diagram_kernels(train_keys, test_keys, h=1):
    excel = parse_master_file(exclude_keys=EXCLUDE_KEYS)
    test_idxs = list(excel[np.isin(excel['Key'], test_keys)].index)
    excel_test = excel.iloc[test_idxs]
    L_test = get_filename_list(excel_test['Associated data'])
    y_test = excel_test['Output: logK'].values
    y_buck_test = excel_test['Output: logKbucket'].values
    
    excel_train = excel.drop(test_idxs, axis=0)
    L_train = get_filename_list(excel_train['Associated data'])
    y_train = excel_train['Output: logK'].values
    y_buck_train = excel_train['Output: logKbucket'].values
    
    print('Computing Training Diagrams')
    diags_train = []
    for l in L_train:
        diags_train.append(compute_diagram(parse_x1(l)[['X', 'Y', 'Z']].values))
    for i in range(len(diags_train)):
        if len(diags_train[i][h]) == 0:
            diags_train[i] = np.array([[0,0]])
        else:
            diags_train[i] = np.array([[pt.birth,pt.death] for pt in diags_train[i][h]])
    
    print('Computing Test Diagrams')
    diags_test = []
    for l in L_test:
        diags_test.append(compute_diagram(parse_x1(l)[['X', 'Y', 'Z']].values))
    for i in range(len(diags_test)):
        if len(diags_test[i][h]) == 0:
            diags_test[i] = np.array([[0,0]])
        else:
            diags_test[i] = np.array([[pt.birth,pt.death] for pt in diags_test[i][h]])
    
    print('Computing Kernel')
    
    dd = diags_train + diags_test
    SW = tda.SlicedWassersteinKernel(num_directions=1, bandwidth=1.)
    
    D = SW.fit_transform(dd)
    
    D_train = D[:len(y_train), :len(y_train)]
    D_test = D[len(y_train):, :len(y_train)]
    return (D_train, y_train), (D_test, y_test)
            
            

In [4]:
excel = parse_master_file(exclude_keys=EXCLUDE_KEYS).reset_index()
test_idxs = list(excel[np.isin(excel['Key'], TEST_KEYS)].index)
keys = excel['Key']
train_keys = [key for key in keys if key not in TEST_KEYS]

[3, 9]


In [5]:
train_set, test_set = create_train_test_diagram_kernels(train_keys, TEST_KEYS)
D_train = train_set[0]
y_train = train_set[1]
D_test = test_set[0]
y_test = test_set[1]
print(D_test.shape)
print(D_train.shape)

[3, 9]
Computing Training Diagrams
Computing Test Diagrams
Computing Kernel
(2, 194)
(194, 194)


In [6]:
train_set, test_set = create_train_test_matrices(train_keys, TEST_KEYS)
X_train = train_set[0]
X_test = test_set[0]
print(X_train.shape)

[3, 9]
(194, 194)


In [7]:
K_train = np.hstack((D_train, X_train))
K_test = np.hstack((D_test, X_test))

In [38]:
# Parameter grid search
Gammas = [1e-2, 1e-1, 1, 10, 100, 'auto']
Cs = [0.1, 1, 10, 20, 50, 100]
Epsilons = [1e-7, 1e-6, 1e-5, 1e-4, 1e-2, 1e-1, 1, 10, 100]
best_params = [0,0,0]
best_score = float('inf')
best_prediction = None
for gamma in Gammas:
    for c in Cs:
        for eps in Epsilons:
            clf = svm.SVR(kernel='rbf', epsilon=eps, C=c, gamma=gamma)
            sws = compute_sample_weights(y_train)
            clf.fit(K_train,y_train, sample_weight=sws)
            prediction = clf.predict(K_test)
            diff = (np.abs(y_test[0] - prediction[0]) + np.abs(y_test[1] - prediction[1]))/2
            print('gamma: {}, c: {}, epsilon: {}, Difference: {}:'.format(gamma, c, eps, diff))
            if diff < best_score:
                best_score = diff
                best_params = [gamma, c, eps]
                best_prediction = prediction

gamma: 0.01, c: 0.1, epsilon: 1e-07, Difference: 2.798124130132389:
gamma: 0.01, c: 0.1, epsilon: 1e-06, Difference: 2.7981242300833116:
gamma: 0.01, c: 0.1, epsilon: 1e-05, Difference: 2.7981252295925216:
gamma: 0.01, c: 0.1, epsilon: 0.0001, Difference: 2.798123547665348:
gamma: 0.01, c: 0.1, epsilon: 0.01, Difference: 2.799252879511868:
gamma: 0.01, c: 0.1, epsilon: 0.1, Difference: 2.807266838073759:
gamma: 0.01, c: 0.1, epsilon: 1, Difference: 2.919545667600401:
gamma: 0.01, c: 0.1, epsilon: 10, Difference: 4.58:
gamma: 0.01, c: 0.1, epsilon: 100, Difference: 4.580000000000002:
gamma: 0.01, c: 1, epsilon: 1e-07, Difference: 2.690882860776659:
gamma: 0.01, c: 1, epsilon: 1e-06, Difference: 2.690882880850786:
gamma: 0.01, c: 1, epsilon: 1e-05, Difference: 2.6908830815920055:
gamma: 0.01, c: 1, epsilon: 0.0001, Difference: 2.6908718595158256:
gamma: 0.01, c: 1, epsilon: 0.01, Difference: 2.6907725371696944:
gamma: 0.01, c: 1, epsilon: 0.1, Difference: 2.694936253931372:
gamma: 0.01, 

gamma: 10, c: 20, epsilon: 0.0001, Difference: 1.737881975214019:
gamma: 10, c: 20, epsilon: 0.01, Difference: 1.7427185828172962:
gamma: 10, c: 20, epsilon: 0.1, Difference: 1.7793811769057823:
gamma: 10, c: 20, epsilon: 1, Difference: 2.0136789172200915:
gamma: 10, c: 20, epsilon: 10, Difference: 4.58:
gamma: 10, c: 20, epsilon: 100, Difference: 4.580000000000002:
gamma: 10, c: 50, epsilon: 1e-07, Difference: 1.7925466425631602:
gamma: 10, c: 50, epsilon: 1e-06, Difference: 1.7925591343584895:
gamma: 10, c: 50, epsilon: 1e-05, Difference: 1.7925400150553568:
gamma: 10, c: 50, epsilon: 0.0001, Difference: 1.7926516305676077:
gamma: 10, c: 50, epsilon: 0.01, Difference: 1.796262999074349:
gamma: 10, c: 50, epsilon: 0.1, Difference: 1.823014244655885:
gamma: 10, c: 50, epsilon: 1, Difference: 1.9817889988089594:
gamma: 10, c: 50, epsilon: 10, Difference: 4.58:
gamma: 10, c: 50, epsilon: 100, Difference: 4.580000000000002:
gamma: 10, c: 100, epsilon: 1e-07, Difference: 1.7455485521991256

In [39]:
print(best_score, best_prediction[0], best_prediction[1], best_params)

1.737796655458835 6.050343105662476 2.985936416580146 [0.01, 20, 1e-07]


In [40]:
y_test

array([6.54, 0.  ])

In [28]:
Gammas = [1e-4, 1e-2, 1e-1, 1, 10, 100, 'auto']
Cs = [0.1, 1, 10, 20, 50, 100]
Epsilons = [1e-7, 1e-6, 1e-5, 1e-4, 1e-2, 1e-1, 1, 10, 100]
best_params = [0,0,0]
best_score = -float('inf')
best_cvs = None
for gamma in Gammas:
    for c in Cs:
        for eps in Epsilons:
            clf = svm.SVR(kernel='rbf', epsilon=eps, C=c, gamma=gamma)
            sws = compute_sample_weights(y_train)
            cvs = cross_val_score(clf, K_train, y_train, cv=5, scoring='neg_median_absolute_error', fit_params={'sample_weight':sws})
            print('gamma: {}, c: {}, epsilon: {}, Mean Cross-Validation Score: {}, Stddev: {}'.format(gamma, c, eps, cvs.mean(), cvs.std()))
            if cvs.mean() > best_score:
                best_score = cvs.mean()
                best_cvs = cvs
                best_params = [gamma, c, eps]

gamma: 0.0001, c: 0.1, epsilon: 1e-07, Mean Cross-Validation Score: -3.5842988080577483, Stddev: 1.0942811763635214
gamma: 0.0001, c: 0.1, epsilon: 1e-06, Mean Cross-Validation Score: -3.5842991921063314, Stddev: 1.0942815714502052
gamma: 0.0001, c: 0.1, epsilon: 1e-05, Mean Cross-Validation Score: -3.5843030325921617, Stddev: 1.094285522330475
gamma: 0.0001, c: 0.1, epsilon: 0.0001, Mean Cross-Validation Score: -3.5843414374504605, Stddev: 1.0943250324766032
gamma: 0.0001, c: 0.1, epsilon: 0.01, Mean Cross-Validation Score: -3.588584628039611, Stddev: 1.0986784269345413
gamma: 0.0001, c: 0.1, epsilon: 0.1, Mean Cross-Validation Score: -3.568648716784903, Stddev: 1.069124053461559
gamma: 0.0001, c: 0.1, epsilon: 1, Mean Cross-Validation Score: -3.4163179709966682, Stddev: 1.0050676784529895
gamma: 0.0001, c: 0.1, epsilon: 10, Mean Cross-Validation Score: -2.6691066126855594, Stddev: 0.6413075491289674
gamma: 0.0001, c: 0.1, epsilon: 100, Mean Cross-Validation Score: -2.6691066126855607

gamma: 0.01, c: 10, epsilon: 1e-05, Mean Cross-Validation Score: -2.599019529165681, Stddev: 0.762704048009421
gamma: 0.01, c: 10, epsilon: 0.0001, Mean Cross-Validation Score: -2.598989074846929, Stddev: 0.7626929685582788
gamma: 0.01, c: 10, epsilon: 0.01, Mean Cross-Validation Score: -2.596928133866237, Stddev: 0.7614592179961526
gamma: 0.01, c: 10, epsilon: 0.1, Mean Cross-Validation Score: -2.579067632259396, Stddev: 0.7432060856763251
gamma: 0.01, c: 10, epsilon: 1, Mean Cross-Validation Score: -2.408791840791076, Stddev: 0.6054396570835171
gamma: 0.01, c: 10, epsilon: 10, Mean Cross-Validation Score: -2.6691066126855594, Stddev: 0.6413075491289674
gamma: 0.01, c: 10, epsilon: 100, Mean Cross-Validation Score: -2.6691066126855607, Stddev: 0.6413075491289675
gamma: 0.01, c: 20, epsilon: 1e-07, Mean Cross-Validation Score: -2.4928901144797506, Stddev: 0.5932397794040489
gamma: 0.01, c: 20, epsilon: 1e-06, Mean Cross-Validation Score: -2.492879330326979, Stddev: 0.5932443617547231
g

gamma: 0.1, c: 50, epsilon: 1, Mean Cross-Validation Score: -2.168226470481232, Stddev: 0.2785145161925178
gamma: 0.1, c: 50, epsilon: 10, Mean Cross-Validation Score: -2.6691066126855594, Stddev: 0.6413075491289674
gamma: 0.1, c: 50, epsilon: 100, Mean Cross-Validation Score: -2.6691066126855607, Stddev: 0.6413075491289675
gamma: 0.1, c: 100, epsilon: 1e-07, Mean Cross-Validation Score: -2.145720331348687, Stddev: 0.2675360996285032
gamma: 0.1, c: 100, epsilon: 1e-06, Mean Cross-Validation Score: -2.145720316428576, Stddev: 0.26753609506124654
gamma: 0.1, c: 100, epsilon: 1e-05, Mean Cross-Validation Score: -2.1457025497969924, Stddev: 0.26751040858791686
gamma: 0.1, c: 100, epsilon: 0.0001, Mean Cross-Validation Score: -2.1457318151961564, Stddev: 0.2675219886019046
gamma: 0.1, c: 100, epsilon: 0.01, Mean Cross-Validation Score: -2.1454206010080363, Stddev: 0.2674080600933508
gamma: 0.1, c: 100, epsilon: 0.1, Mean Cross-Validation Score: -2.142601058113507, Stddev: 0.2656959903195914

gamma: 10, c: 1, epsilon: 0.0001, Mean Cross-Validation Score: -2.1911802730103807, Stddev: 0.536581798697505
gamma: 10, c: 1, epsilon: 0.01, Mean Cross-Validation Score: -2.1903110440865095, Stddev: 0.5362696717603328
gamma: 10, c: 1, epsilon: 0.1, Mean Cross-Validation Score: -2.1797687584769476, Stddev: 0.5326412139639319
gamma: 10, c: 1, epsilon: 1, Mean Cross-Validation Score: -2.1304963018633365, Stddev: 0.5288188879973231
gamma: 10, c: 1, epsilon: 10, Mean Cross-Validation Score: -2.6691066126855594, Stddev: 0.6413075491289674
gamma: 10, c: 1, epsilon: 100, Mean Cross-Validation Score: -2.6691066126855607, Stddev: 0.6413075491289675
gamma: 10, c: 10, epsilon: 1e-07, Mean Cross-Validation Score: -2.109847029965116, Stddev: 0.6015083263209795
gamma: 10, c: 10, epsilon: 1e-06, Mean Cross-Validation Score: -2.109847057968664, Stddev: 0.6015083506019808
gamma: 10, c: 10, epsilon: 1e-05, Mean Cross-Validation Score: -2.109847338990751, Stddev: 0.6015085930921739
gamma: 10, c: 10, epsi

gamma: 100, c: 20, epsilon: 1, Mean Cross-Validation Score: -2.152849833072905, Stddev: 0.637053623074208
gamma: 100, c: 20, epsilon: 10, Mean Cross-Validation Score: -2.6691066126855594, Stddev: 0.6413075491289674
gamma: 100, c: 20, epsilon: 100, Mean Cross-Validation Score: -2.6691066126855607, Stddev: 0.6413075491289675
gamma: 100, c: 50, epsilon: 1e-07, Mean Cross-Validation Score: -2.1095933313451187, Stddev: 0.601360809901943
gamma: 100, c: 50, epsilon: 1e-06, Mean Cross-Validation Score: -2.1095933610487796, Stddev: 0.6013608344611939
gamma: 100, c: 50, epsilon: 1e-05, Mean Cross-Validation Score: -2.1095936580853913, Stddev: 0.601361080053786
gamma: 100, c: 50, epsilon: 0.0001, Mean Cross-Validation Score: -2.109596628451504, Stddev: 0.6013635359876084
gamma: 100, c: 50, epsilon: 0.01, Mean Cross-Validation Score: -2.109925247369259, Stddev: 0.6016250409892931
gamma: 100, c: 50, epsilon: 0.1, Mean Cross-Validation Score: -2.1124523250901666, Stddev: 0.6037079478924358
gamma: 10

In [29]:
print(best_score, best_cvs, best_params)

-2.062309165814432 [-1.90194667 -2.54401009 -1.16537623 -2.70335929 -1.99685355] [1, 50, 1e-05]


In [30]:
clf = svm.SVR(kernel='precomputed', epsilon=best_params[2], C=best_params[1], gamma=best_params[0])
sws = compute_sample_weights(y_train)
clf.fit(D_train,y_train, sample_weight=sws)
prediction = clf.predict(D_test)

In [31]:
prediction

array([6.00071484, 3.04579487])

In [32]:
y_test

array([6.54, 0.  ])

In [33]:
clf = svm.SVR(kernel='rbf', epsilon=best_params[2], C=best_params[1], gamma=best_params[0])
sws = compute_sample_weights(y_train)
clf.fit(K_train,y_train, sample_weight=sws)
prediction = clf.predict(K_test)

In [34]:
prediction

array([5.13473499, 4.98807916])

In [35]:
Gammas = [1e-4, 1e-2, 1e-1, 1, 10, 100, 'auto']
Cs = [0.1, 1, 10, 20, 50, 100]
Epsilons = [1e-7, 1e-6, 1e-5, 1e-4, 1e-2, 1e-1, 1, 10, 100]
best_params = [0,0,0]
best_score = -float('inf')
best_cvs = None
for gamma in Gammas:
    for c in Cs:
        for eps in Epsilons:
            clf = svm.SVR(kernel='precomputed', epsilon=eps, C=c, gamma=gamma)
            sws = compute_sample_weights(y_train)
            cvs = cross_val_score(clf, D_train, y_train, cv=5, scoring='neg_mean_absolute_error', fit_params={'sample_weight':sws})
            print('gamma: {}, c: {}, epsilon: {}, Mean Cross-Validation Score: {}, Stddev: {}'.format(gamma, c, eps, cvs.mean(), cvs.std()))
            if cvs.mean() > best_score:
                best_score = cvs.mean()
                best_cvs = cvs
                best_params = [gamma, c, eps]

gamma: 0.0001, c: 0.1, epsilon: 1e-07, Mean Cross-Validation Score: -3.0853169724057543, Stddev: 0.9150096943270645
gamma: 0.0001, c: 0.1, epsilon: 1e-06, Mean Cross-Validation Score: -3.0853271965981506, Stddev: 0.9150144408114264
gamma: 0.0001, c: 0.1, epsilon: 1e-05, Mean Cross-Validation Score: -3.085315518104125, Stddev: 0.9150105643310753
gamma: 0.0001, c: 0.1, epsilon: 0.0001, Mean Cross-Validation Score: -3.0852846319240337, Stddev: 0.9150194505807188
gamma: 0.0001, c: 0.1, epsilon: 0.01, Mean Cross-Validation Score: -3.08333244166926, Stddev: 0.9160668025626666
gamma: 0.0001, c: 0.1, epsilon: 0.1, Mean Cross-Validation Score: -3.0811702235589418, Stddev: 0.9276660194715709
gamma: 0.0001, c: 0.1, epsilon: 1, Mean Cross-Validation Score: -3.0181201701346163, Stddev: 0.9132016020887115
gamma: 0.0001, c: 0.1, epsilon: 10, Mean Cross-Validation Score: -2.6691066126855594, Stddev: 0.6413075491289674
gamma: 0.0001, c: 0.1, epsilon: 100, Mean Cross-Validation Score: -2.669106612685560

gamma: 0.01, c: 10, epsilon: 1e-06, Mean Cross-Validation Score: -2.4510991060797247, Stddev: 0.5361697790558325
gamma: 0.01, c: 10, epsilon: 1e-05, Mean Cross-Validation Score: -2.451093510472586, Stddev: 0.5361417403352908
gamma: 0.01, c: 10, epsilon: 0.0001, Mean Cross-Validation Score: -2.4510726032275953, Stddev: 0.5361280379190608
gamma: 0.01, c: 10, epsilon: 0.01, Mean Cross-Validation Score: -2.449100782359846, Stddev: 0.5365894874668754
gamma: 0.01, c: 10, epsilon: 0.1, Mean Cross-Validation Score: -2.4321622908432, Stddev: 0.5398583776217022
gamma: 0.01, c: 10, epsilon: 1, Mean Cross-Validation Score: -2.38099499239089, Stddev: 0.37687623578469087
gamma: 0.01, c: 10, epsilon: 10, Mean Cross-Validation Score: -2.6691066126855594, Stddev: 0.6413075491289674
gamma: 0.01, c: 10, epsilon: 100, Mean Cross-Validation Score: -2.6691066126855607, Stddev: 0.6413075491289675
gamma: 0.01, c: 20, epsilon: 1e-07, Mean Cross-Validation Score: -2.387026007016677, Stddev: 0.46051426749859203


gamma: 0.1, c: 50, epsilon: 0.1, Mean Cross-Validation Score: -2.2835911004491103, Stddev: 0.3334600007746135
gamma: 0.1, c: 50, epsilon: 1, Mean Cross-Validation Score: -2.3366320754782763, Stddev: 0.2339542773562836
gamma: 0.1, c: 50, epsilon: 10, Mean Cross-Validation Score: -2.6691066126855594, Stddev: 0.6413075491289674
gamma: 0.1, c: 50, epsilon: 100, Mean Cross-Validation Score: -2.6691066126855607, Stddev: 0.6413075491289675
gamma: 0.1, c: 100, epsilon: 1e-07, Mean Cross-Validation Score: -2.2460940162001193, Stddev: 0.23885259105019044
gamma: 0.1, c: 100, epsilon: 1e-06, Mean Cross-Validation Score: -2.246054065109862, Stddev: 0.23883502517615296
gamma: 0.1, c: 100, epsilon: 1e-05, Mean Cross-Validation Score: -2.2460842831373444, Stddev: 0.23885165759534072
gamma: 0.1, c: 100, epsilon: 0.0001, Mean Cross-Validation Score: -2.2461084012196086, Stddev: 0.23883814589405725
gamma: 0.1, c: 100, epsilon: 0.01, Mean Cross-Validation Score: -2.2460567228339423, Stddev: 0.240421694959

gamma: 10, c: 1, epsilon: 1e-05, Mean Cross-Validation Score: -2.6698570145683314, Stddev: 0.9132355448349485
gamma: 10, c: 1, epsilon: 0.0001, Mean Cross-Validation Score: -2.6698303806445347, Stddev: 0.9132421774104262
gamma: 10, c: 1, epsilon: 0.01, Mean Cross-Validation Score: -2.66802066202961, Stddev: 0.910564299544904
gamma: 10, c: 1, epsilon: 0.1, Mean Cross-Validation Score: -2.648301351687758, Stddev: 0.8855121333788933
gamma: 10, c: 1, epsilon: 1, Mean Cross-Validation Score: -2.4492840789426573, Stddev: 0.6698068488643961
gamma: 10, c: 1, epsilon: 10, Mean Cross-Validation Score: -2.6691066126855594, Stddev: 0.6413075491289674
gamma: 10, c: 1, epsilon: 100, Mean Cross-Validation Score: -2.6691066126855607, Stddev: 0.6413075491289675
gamma: 10, c: 10, epsilon: 1e-07, Mean Cross-Validation Score: -2.451099226926165, Stddev: 0.5361698514768051
gamma: 10, c: 10, epsilon: 1e-06, Mean Cross-Validation Score: -2.4510991060797247, Stddev: 0.5361697790558325
gamma: 10, c: 10, epsilo

gamma: 100, c: 50, epsilon: 0.0001, Mean Cross-Validation Score: -2.294309758438083, Stddev: 0.34227233420142844
gamma: 100, c: 50, epsilon: 0.01, Mean Cross-Validation Score: -2.293158410104958, Stddev: 0.34133481833285784
gamma: 100, c: 50, epsilon: 0.1, Mean Cross-Validation Score: -2.2835911004491103, Stddev: 0.3334600007746135
gamma: 100, c: 50, epsilon: 1, Mean Cross-Validation Score: -2.3366320754782763, Stddev: 0.2339542773562836
gamma: 100, c: 50, epsilon: 10, Mean Cross-Validation Score: -2.6691066126855594, Stddev: 0.6413075491289674
gamma: 100, c: 50, epsilon: 100, Mean Cross-Validation Score: -2.6691066126855607, Stddev: 0.6413075491289675
gamma: 100, c: 100, epsilon: 1e-07, Mean Cross-Validation Score: -2.2460940162001193, Stddev: 0.23885259105019044
gamma: 100, c: 100, epsilon: 1e-06, Mean Cross-Validation Score: -2.246054065109862, Stddev: 0.23883502517615296
gamma: 100, c: 100, epsilon: 1e-05, Mean Cross-Validation Score: -2.2460842831373444, Stddev: 0.2388516575953407

In [41]:
print(best_score, best_cvs, best_params)
clf = svm.SVR(kernel='precomputed', epsilon=best_params[2], C=best_params[1], gamma=best_params[0])
sws = compute_sample_weights(y_train)
clf.fit(D_train,y_train, sample_weight=sws)
prediction = clf.predict(D_test)
prediction

1.737796655458835 [-2.27853796 -2.57854748 -2.38831455 -1.8812582  -2.10361214] [0.01, 20, 1e-07]


array([6.05034311, 2.98593642])