In [1]:
%matplotlib inline
from pysgpp import Grid, SurplusRefinementFunctor, DataVector

import sys, os
sys.path.append(os.path.abspath(os.path.join('..', 'src/')))
from sgpi.util import get_dataset, get_xy, get_r_squared, split, to_data_matrix
from sgpi.plot import *

from sgpi import model
from sgpi.learner import SGRegressionLearner

import pandas as pd
import pysgpp as sg

In [2]:
df = get_dataset('friedman1')
df_train, df_test = split(df)
X_train, y_train = get_xy(df_train)
X_test, y_test = get_xy(df_test)

grid_config = model.GridConfig(type=6, level=2, T=0)
adaptivity_config = model.AdaptivityConfig(num_refinements=1, no_points=1, treshold=0.0, percent=0.0)
reg_type = sg.RegularizationType_GroupLasso
solv_type = sg.SLESolverType_FISTA

solver_config = model.SolverConfig(type=solv_type, max_iterations=400, epsilon=0, threshold=10e-6)
final_solver_config = solver_config

max_lambda = 0.05
regularization_config = model.RegularizationConfig(type=reg_type, exponent_base=1, lambda_reg=max_lambda, l1_ratio=1.0)

estimator = SGRegressionLearner(grid_config, regularization_config, solver_config,
                                final_solver_config, adaptivity_config)

In [3]:
estimator.fit(X_train, y_train)

In [4]:
#1.0911194496858498 before fast grad
-estimator.score(X_train, y_train)

2.3726483788791497

In [5]:
sorted(np.abs(estimator.get_weights()))

[0.0020317198410307368,
 0.0033451756416796987,
 0.0048293301426353078,
 0.0084890804411788777,
 0.0098367109539943352,
 0.011635461353768901,
 0.01190649253887276,
 0.014820649854315153,
 0.015889488908372999,
 0.021118449428737005,
 0.021167136456950807,
 0.021717245681194583,
 0.026243449837783529,
 0.027355794145845234,
 0.027558154038606571,
 0.027669184058857647,
 0.030171475227671232,
 0.031279066136862842,
 0.036527796611760123,
 0.044282188871957534,
 0.045989096340115471,
 0.051844932127228974,
 0.053740742006123665,
 0.055086621321497158,
 0.05924046733509121,
 0.086661920103260667,
 0.10498733889259343,
 0.15088869012836692,
 0.28860429357857392,
 0.70350336586901452,
 1.1871622753211886,
 1.3220638269240721,
 1.3473104520822745,
 2.4071604169316099,
 2.425658407222798,
 2.444985971126941,
 2.4844268868117934,
 2.5041166113424742,
 4.5680622718768928,
 5.1812218054333474,
 16.451355111565828]

In [3]:
dim = 4
grid = Grid.createLinearGrid(dim)
storage = grid.getStorage()
gen = grid.getGenerator()
gen.regular(5, 0.0)

#Refine the grid once
alpha = DataVector(np.random.random((grid.getSize())))
ref = SurplusRefinementFunctor(alpha, 10)
gen.refine(ref)

coords = []
levels = []
for x in range(0, grid.getSize()):
    gen0 = storage.get(x)
    curCoords = []
    curLevels = []
    for i in range(0,dim):
        curCoords.append(gen0.getCoord(i))
        curLevels.append(gen0.getLevel(i))
    curCoords = np.array(curCoords)
    curLevels = np.array(curLevels)
    coords.append(curCoords)
    levels.append(curLevels)
df = pd.DataFrame(coords)
df_level = pd.DataFrame(levels)

In [4]:
df.describe()

Unnamed: 0,0,1,2,3
count,848.0,848.0,848.0,848.0
mean,0.506191,0.489976,0.500369,0.496167
std,0.199132,0.195963,0.196356,0.202976
min,0.03125,0.03125,0.03125,0.03125
25%,0.375,0.375,0.375,0.375
50%,0.5,0.5,0.5,0.5
75%,0.625,0.5625,0.625,0.625
max,0.96875,0.96875,0.96875,0.96875


In [5]:
for i in df[0].unique():
    count = df[df[0] == i][0].count()
    print("Coord: {},\tcount: {}".format(i, count))

Coord: 0.5,	count: 369
Coord: 0.25,	count: 126
Coord: 0.75,	count: 131
Coord: 0.125,	count: 33
Coord: 0.375,	count: 33
Coord: 0.625,	count: 34
Coord: 0.875,	count: 40
Coord: 0.0625,	count: 7
Coord: 0.1875,	count: 7
Coord: 0.3125,	count: 7
Coord: 0.4375,	count: 7
Coord: 0.5625,	count: 13
Coord: 0.6875,	count: 7
Coord: 0.8125,	count: 8
Coord: 0.9375,	count: 8
Coord: 0.03125,	count: 1
Coord: 0.09375,	count: 1
Coord: 0.15625,	count: 1
Coord: 0.21875,	count: 1
Coord: 0.28125,	count: 1
Coord: 0.34375,	count: 1
Coord: 0.40625,	count: 1
Coord: 0.46875,	count: 1
Coord: 0.53125,	count: 2
Coord: 0.59375,	count: 2
Coord: 0.65625,	count: 1
Coord: 0.71875,	count: 1
Coord: 0.78125,	count: 1
Coord: 0.84375,	count: 1
Coord: 0.90625,	count: 1
Coord: 0.96875,	count: 1


In [6]:
def decode(num):
    zeros = np.zeros(len(num)) + 0.5
    return np.equal(zeros, num)

In [31]:
terms = {}
groups = {}
terms_nums = []
for num, r in enumerate(coords):
    d = tuple(decode(r))
    if d not in terms:
        terms[d] = []
        groups[d] = len(groups)
    terms[d].append(num)
    terms_nums.append(groups[d])

In [32]:
for key in terms:
    print key, len(terms[key])

(True, False, True, True) 30
(True, True, False, True) 32
(True, True, True, False) 30
(False, False, False, False) 24
(True, False, True, False) 68
(False, True, True, True) 30
(False, False, True, True) 72
(True, True, True, True) 1
(False, False, True, False) 66
(True, False, False, True) 70
(False, True, True, False) 72
(False, True, False, False) 69
(False, True, False, True) 78
(True, False, False, False) 64
(False, False, False, True) 68
(True, True, False, False) 74


In [33]:
groups

{(False, False, False, False): 15,
 (False, False, False, True): 7,
 (False, False, True, False): 11,
 (False, False, True, True): 3,
 (False, True, False, False): 13,
 (False, True, False, True): 5,
 (False, True, True, False): 9,
 (False, True, True, True): 1,
 (True, False, False, False): 14,
 (True, False, False, True): 6,
 (True, False, True, False): 10,
 (True, False, True, True): 2,
 (True, True, False, False): 12,
 (True, True, False, True): 4,
 (True, True, True, False): 8,
 (True, True, True, True): 0}

In [34]:
terms_nums

[0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
