## Sparse Grid Regression with the Combination Technique: 


### Utility functions

In [None]:
from sparseSpACE.Function import *
from sparseSpACE.Utils import *
import numpy as np
import math

def construct_dataset(dim, function, num_points):
    #one_d_grid = np.linspace(0, 1, points_per_dim)
    #grid_arr = [one_d_grid]*dim
    grid_points = np.random.rand(num_points,2)
    #grid_points = get_cross_product_numpy_array(grid_arr)
    y_vals = np.array([function(x) for x in grid_points])
    return grid_points, y_vals.flatten()

    
def split_dataset(data, targets):
    training_size = 0
    test_size = 0
    for i in range(len(data)):
        if i%5 != 0:
            training_size += 1
        else:
            test_size += 1
            
    training_data = np.zeros((training_size, len(data[0]))) 
    training_targets = np.zeros(training_size) 
    
    test_data = np.zeros((test_size, len(data[0]))) 
    test_targets = np.zeros(test_size) 
    
    training_index = 0
    test_index = 0
    
    for i in range(len(data)):
        if i%5 != 0:
            training_data[training_index] = data[i]
            training_targets[training_index] = targets[i]
            training_index += 1
        else:
            test_data[test_index] = data[i]
            test_targets[test_index] = targets[i]
            test_index += 1
    
    return training_data, training_targets, test_data, test_targets


def train_regression(training_data, training_targets, regularization, matrix, minimum_level, maximum_level):
    dim = len(training_data[0])
    
    a = np.zeros(dim)
    b = np.ones(dim)
    
    operation = Regression(training_data, training_targets, regularization, dim, regularization_matrix=matrix)
    
    combiObject = StandardCombi(a, b, operation=operation)
    
    combiObject.perform_operation(minimum_level, maximum_level)
    
    return operation, combiObject


def test_regression(test_data, test_targets, combiObject):
    learned_targets = combiObject(test_data)
    
    difference = 0
    for i in range(len(learned_targets)):
        difference += (test_targets[i] - learned_targets[i]) ** 2
        
    return math.sqrt(difference/len(test_targets))

def scale_data(data, target, rangee=[0.05,0.95]):
    dataSet = DataSetRegression((data, target))
    dataSet.scale_range(rangee)
    data, target = dataSet.get_data()[0], dataSet.get_data()[1]
    return data, target

### Version without spatial adaptivity:

In [None]:
# import sparseSpACE
%matplotlib inline
import numpy as np
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *
from sparseSpACE.StandardCombi import *
from sparseSpACE.DEMachineLearning import *
from sklearn import datasets

# -------------------------------------- Dataset 0 (Gaussian) --------------------------------------------
#func = GenzGaussian((0.5,0.5), (15,15))
#data, target = construct_dataset(2, func, 400)
# -------------------------------------- Dataset 0 (Gaussian) --------------------------------------------
    
    
# -------------------------------------- Dataset 0 (Stern) --------------------------------------------
# func = GenzC0((10,10), (0.5,0.5))
# data, target = construct_dataset(2, func, 400)
#func.plot((0,0),(1,1))
# -------------------------------------- Dataset 0 (Stern) --------------------------------------------
    
# -------------------------------------- Dataset 1 (Boston) --------------------------------------------
data, target = datasets.load_boston(return_X_y=True)
# -------------------------------------- Dataset 1 (Boston) -------------------------------------------

# -------------------------------------- Dataset 2 (Diabetes) --------------------------------------------
# data, target = datasets.load_diabetes(return_X_y=True)
# -------------------------------------- Dataset 2 (Diabetes) --------------------------------------------

data, target = scale_data(data, target)
#split the dataset into training and test data
training_data, training_targets, test_data, test_targets = split_dataset(data, target)   

stringBuilder1 = ""
for i in range(10):
    operation, combiObject = train_regression(training_data, training_targets, regularization=10**-i, matrix=0, minimum_level=1, maximum_level=4)
    stringBuilder1 = stringBuilder1 + str(10**-i) +" & " + str(test_regression(test_data, test_targets, combiObject)) + "\\\\\n"

stringBuilder2 = ""
for i in range(10):
    operation, combiObject = train_regression(training_data, training_targets, regularization=10**-i, matrix=1, minimum_level=1, maximum_level=4)
    stringBuilder2 = stringBuilder2 + str(10**-i) +" & " + str(test_regression(test_data, test_targets, combiObject))  + "\\\\\n"
    
print("Tabelle:")
print(stringBuilder1)
print(stringBuilder2)


print("Combination Scheme:")
# when you pass the operation the function also plots the contour plot of each component grid
combiObject.print_resulting_combi_scheme(operation=operation)
print("Sparse Grid:")
#combiObject.print_resulting_sparsegrid(markersize=20)
print("Plot of Regression:")
# when contour = True, the contour plot is shown next to the 3D plot
combiObject.plot(contour=True)

## Version with the spatial adaptivity:

In [None]:
def test_regression_spatially_adaptive(test_data, test_targets, adaptiveCombiInstanceSingleDim, grid):
    learned_targets = adaptiveCombiInstanceSingleDim(test_data)
    
    difference = 0
    for i in range(len(learned_targets)):
        difference += (test_targets[i] - learned_targets[i]) ** 2
        
    return difference/len(test_targets)

In [None]:
%matplotlib inline
import sparseSpACE
import numpy as np
from sparseSpACE.spatiallyAdaptiveSingleDimension2 import *
from sparseSpACE.Function import *
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *

# dimension of the problem
dim = 13

# define integration domain boundaries
a = np.zeros(dim)
b = np.ones(dim)

# define function to be integrated
#midpoint = np.ones(dim) * 0.5
#coefficients = np.array([ 10**0 * (d+1) for d in range(dim)])

# ---------------------------------- Dataset 0 (Gaussian) -------------------
#func = GenzGaussian((0.5,0.5), (10,10))
#data, target = construct_dataset(dim, func, 500)
# ---------------------------------- Dataset 0 (Gaussian) -------------------

# ---------------------------------- Dataset 1 (Linear) ---------------------
#func = FunctionLinear((1,1))
#data, target = construct_dataset(dim, func, 500)
# ---------------------------------- Dataset 1 (Linear) ---------------------

# ---------------------------------- Dataset 2 (Boston) dimension = 13 ------
data, target = datasets.load_boston(return_X_y=True)
print(data)
print(target)
# -------------------------------------- Dataset 2 (Boston) -----------------

# ---------------------------------- Dataset 3 (Diabetes) dimension = 10 ----
# data, target = datasets.load_diabetes(return_X_y=True)
# ---------------------------------- Dataset 3 (Diabetes) -------------------


#split the dataset into training and test data
training_data, training_targets, test_data, test_targets = split_dataset(data, target)   

# define error estimator for refinement
errorOperator = ErrorCalculatorSingleDimVolumeGuided()

# define equidistant grid
grid=GlobalTrapezoidalGrid(a=a, b=b, modified_basis=False, boundary=False)

# define operation which shall be performed in the combination technique
operation = Regression(training_data, training_targets, 0.0000001, dim, regularization_matrix=0, grid=grid)

# define SingleDim refinement strategy for Spatially Adaptive Combination Technique
adaptiveCombiInstanceSingleDim = SpatiallyAdaptiveSingleDimensions2(np.ones(dim) * a, np.ones(dim) * b, margin=0.1, operation=operation, rebalancing=False)

# performing the spatially adaptive refinement with the SingleDim method
adaptiveCombiInstanceSingleDim.performSpatiallyAdaptiv(1, 2, errorOperator, 10**-3, do_plot=True, max_evaluations=200)

difference = test_regression_spatially_adaptive(test_data, test_targets, adaptiveCombiInstanceSingleDim, operation.grid)

print("Testfehler:")
print(difference)