In [1]:
%matplotlib inline
from pysgpp import RegressionLearner, RegularGridConfiguration, AdpativityConfiguration, SLESolverConfiguration, \
RegularizationConfiguration, DataMatrix, DataVector

import sys, os
sys.path.append(os.path.abspath(os.path.join('..', 'src/')))
from sgpi.util import get_dataset, get_xy, get_r_squared, split, to_data_matrix

from sgpi import model
from sgpi.learner import SGRegressionLearner
from sgpi.grid_search import evaluate
from sgpi.bayes import Hyp_param, BayesOptReg

from sklearn.cross_validation import ShuffleSplit, KFold

import sqlalchemy as sa
from sqlalchemy import orm, func

from collections import namedtuple
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
import seaborn as sns; sns.set_style('darkgrid')
sns.set_palette(sns.color_palette("husl", 10), n_colors=10)

import pandas as pd

import bayesopt
from bayesoptmodule import BayesOptContinuous

import pysgpp as sg

In [3]:
df = get_dataset('friedman3')
df_train, df_test = split(df)
X_train, y_train = get_xy(df_train)
X_test, y_test = get_xy(df_test)

grid_config = model.GridConfig(type=6, level=2, T=0)
adaptivity_config = model.AdaptivityConfig(num_refinements=0, no_points=0, treshold=0.0, percent=0.0)
reg_type = sg.RegularizationType_ElasticNet
solv_type = sg.SLESolverType_FISTA

solver_config = model.SolverConfig(type=solv_type, max_iterations=1000, epsilon=10e-4, threshold=0.0)
final_solver_config = model.SolverConfig(type=solv_type, max_iterations=30, epsilon=10e-4, threshold=10e-2)

regularization_config = model.RegularizationConfig(type=reg_type, exponent_base=1, lambda_reg=0.1, l1_ratio=0.4)

estimator = SGRegressionLearner(grid_config, regularization_config, solver_config,
                                final_solver_config, adaptivity_config)

cv = ShuffleSplit(X_train.shape[0], n_iter=10, random_state=42)


In [4]:
from sgpi.grid_search import evaluate
evaluate(estimator, {}, cv, X_train, y_train)

Grid_score(parameters={}, mean_validation_score=-0.028250816601906703, cv_validation_scores=[-0.02769839355425189, -0.024714032397784608, -0.02904671461548936, -0.027226343939057936, -0.02893862206614577, -0.02947172435568328, -0.027531560576885467, -0.03034285594644619, -0.029677370511880283, -0.027860548055442243], cv_grid_sizes=[9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L])

In [7]:
params = [Hyp_param('regularization_config__lambda_reg', 0.0, 5.0),
                  Hyp_param('regularization_config__l1_ratio', 0.0, 1.0)]
opt = BayesOptReg(estimator, cv, X_train, y_train, params, 10, n_init_samples=2)

In [8]:
opt.lower_bound, opt.upper_bound

(array([ 0.,  0.]), array([ 5.,  1.]))

In [9]:
mval, params, best_grid = opt.optimize()
mval, params, best_grid

{'regularization_config__l1_ratio': 0.59006271080579609, 'regularization_config__lambda_reg': 2.6130466302856803}
Grid_score(parameters={'regularization_config__l1_ratio': 0.59006271080579609, 'regularization_config__lambda_reg': 2.6130466302856803}, mean_validation_score=-0.028271373797753418, cv_validation_scores=[-0.027730750879504492, -0.024724642482352933, -0.02905416532339644, -0.027203023381947782, -0.028959738442652875, -0.029527286837431273, -0.027532385038519597, -0.030343863359215205, -0.029720720203388114, -0.027917162029125496], cv_grid_sizes=[9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L])
{'regularization_config__l1_ratio': 0.24530289939139038, 'regularization_config__lambda_reg': 0.29283551091793925}
Grid_score(parameters={'regularization_config__l1_ratio': 0.24530289939139038, 'regularization_config__lambda_reg': 0.29283551091793925}, mean_validation_score=-0.028254032420649222, cv_validation_scores=[-0.02770148429028384, -0.024716477358507762, -0.029051076480487124, -0.02722

(0.028248165024454687,
 {'regularization_config__l1_ratio': 0.99998120309867777,
  'regularization_config__lambda_reg': 2.4518773711379431},
 [9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L])

In [26]:
t = model.RegularizationConfig()
t.set_params(**{'lambda_reg': 0.29513056948781013,
               'l1_ratio': 0.2})
t.get_params()

{'exponent_base': None,
 'l1_ratio': None,
 'lambda_reg': 0.29513056948781013,
 'type': None}

In [35]:
estimator.fit(X_train, y_train)

In [36]:
-estimator.score(X_train, y_train), -estimator.score(X_test, y_test)

(12.829890872186757, 16.689242068617364)

In [37]:
w = np.abs(estimator.get_weights()) 
w.sort()

In [22]:
estimator.get_weights()

array([  4.55592052e+02,   1.48563216e+01,  -8.07785255e+00,
         2.29530719e+00,  -3.62313467e+00,  -2.88185953e+00,
         0.00000000e+00,  -1.26395879e+00,  -1.78130204e-02,
        -7.84770010e-02,  -8.23390808e-02,   3.41954563e-01,
        -7.72220425e-01,   0.00000000e+00,   0.00000000e+00,
        -2.63800293e-01,  -8.57804077e-02,   2.66373585e-01,
         0.00000000e+00,  -1.87151992e-01,   1.31775989e-01,
        -9.04490195e-02,  -6.78340298e-01,  -2.37619989e-01,
        -3.67529271e-01,  -1.04440802e-02,  -6.33533316e-03,
         9.77554116e-02,  -1.27973034e-01,   0.00000000e+00,
         0.00000000e+00,  -4.02727699e-01,  -4.20997737e+00,
         4.00886803e-01,   5.30257714e+00,  -1.33435130e+00,
        -9.73640686e-01,   1.64553998e-02,   0.00000000e+00,
        -4.78039137e-01,  -5.11404516e-01,  -1.33950689e+00,
        -7.51677607e-01,   6.42060382e-01,   3.53510109e+00,
         1.54777579e-01,   0.00000000e+00,   0.00000000e+00,
         2.73068315e-02,

In [38]:
len([x for x in w if x == 0.0])

33

In [39]:
len(w)

769

In [4]:
str(cv)

'ShuffleSplit(8000, n_iter=10, test_size=0.1, random_state=42)'