In [1]:
import sqlite3
import pickle
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.cross_validation import train_test_split
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Queue


### Dump train data (continuous)

In [2]:
conn = sqlite3.connect('data.dat')
cursor = conn.cursor()

cursor.execute("SELECT IMAGE, RESPONSE FROM TRAIN")
result = cursor.fetchall()

In [3]:
image = [pickle.loads(r[0]).reshape(-1) for r in result]

In [5]:
response = [r[1] for r in result]

In [5]:
clf = SVR(C=100, epsilon=0.05, gamma='auto', kernel='rbf', shrinking=True)

In [6]:
clf.fit(image, response)

SVR(C=100, cache_size=200, coef0=0.0, degree=3, epsilon=0.05, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [11]:
clf.predict(image[0:3])

array([-0.25044157, -0.25044157, -0.55044157])

In [9]:
response[10]

-0.9

### Splite dataset

In [6]:
x_train, x_test, y_train, y_test = train_test_split(image, response,
                                                   test_size=0.2, random_state=0)

### Choose c and gamma

In [8]:
range_c = (1, 1000)
range_gamma = (1/494, 10)
iter_round = 10
cs = []
gammas = []
variances = []
biases = []
data_queue = Queue()

def generate_parameter():
    for i in range(0, iter_round):
        c = (range_c[1] - range_c[0])/iter_round + range_c[0]
        for j in range(0, iter_round):
            g = (range_gamma[1] - range_gamma[0])/iter_round + range_gamma[0]
            yield c, g

# vectors
def cost(x, y):
    x_array = np.array(x)
    y_array = np.array(y)
    square = x_array.dot(y_array)
    return square/y_array.shape[0]

def calcu(c, gamma, x_tr, y_tr, x_te, y_te, queue):
    clf = SVR(C=c, epsilon=0.05, kernel='rbf', gamma=gamma, shrinking=True)
    clf.fit(x_tr, y_tr)
    var = cost(clf.predict(x_tr), y_tr)
    bias = cost(clf.predict(x_te), y_te)
    queue.put((c, gamma, var, bias))
    print('c: %f', 'gamma: %f' % (c,gamma))

"""
with ProcessPoolExecutor(max_workers=4) as executor:
    for c, gamma in generate_parameter():
        executor.submit(calcu, c, gamma, x_train, y_train, x_test, y_test,
                       data_queue)
"""


'\nwith ProcessPoolExecutor(max_workers=4) as executor:\n    for c, gamma in generate_parameter():\n        executor.submit(calcu, c, gamma, x_train, y_train, x_test, y_test,\n                       data_queue)\n'

### No parallel

In [10]:
range_c = (1, 1000)
range_gamma = (1/494, 10)
iter_round = 10
cs = []
gammas = []
variances = []
biases = []
for c, gamma in generate_parameter():
    clf = SVR(C=c, epsilon=0.05, kernel='rbf', gamma=gamma, shrinking=True)
    clf.fit(x_train, y_train)
    var = cost(clf.predict(x_train), y_train)
    bias = cost(clf.predict(x_test), y_test)
    cs.append(c)
    gammas.append(gamma)
    variances.append(var)
    biases.append(bias)
    print('c: %f', 'gamma: %f' %(c, gamma))

pickle.dump(zip(cs, gammas, variances, biases), open('para_1_1000_1/494_10'， 'wb'))


TypeError: not all arguments converted during string formatting

### Plot 