# Sparse Grid Regression with the Combination Technique: 


### Utility functions :

In [None]:
from sparseSpACE.Function import *
from sparseSpACE.Utils import *
from sparseSpACE.DEMachineLearning import *
import numpy as np
import math

def construct_dataset(dim, function, num_points):
    grid_points = np.random.rand(num_points,2)
    y_vals = np.array([function(x) for x in grid_points])
    return grid_points, y_vals.flatten()

    
def split_dataset(data, targets, parts):
    training_size = 0
    test_size = 0
    for i in range(len(data)):
        if i%parts != 0:
            training_size += 1
        else:
            test_size += 1
            
    training_data = np.zeros((training_size, len(data[0]))) 
    training_targets = np.zeros(training_size) 
    
    test_data = np.zeros((test_size, len(data[0]))) 
    test_targets = np.zeros(test_size) 
    
    training_index = 0
    test_index = 0
    
    for i in range(len(data)):
        if i%parts != 0:
            training_data[training_index] = data[i]
            training_targets[training_index] = targets[i]
            training_index += 1
        else:
            test_data[test_index] = data[i]
            test_targets[test_index] = targets[i]
            test_index += 1
    
    return training_data, training_targets, test_data, test_targets


def train_regression(training_data, training_targets, regularization, matrix, minimum_level, maximum_level):
    dim = len(training_data[0])
    
    a = np.zeros(dim)
    b = np.ones(dim)
    
    operation = Regression(training_data, training_targets, regularization, dim, regularization_matrix=matrix)
    
    combiObject = StandardCombi(a, b, operation=operation)
    
    combiObject.perform_operation(minimum_level, maximum_level)
    
    return operation, combiObject


def test_regression(test_data, test_targets, combiObject):
    learned_targets = combiObject(test_data)
    
    difference = 0
    for i in range(len(learned_targets)):
        difference += (test_targets[i] - learned_targets[i]) ** 2
        
    return math.sqrt(difference/len(test_targets))

def scale_data(data, target, rangee=[0.05,0.95]):
    dataSet = DataSetRegression((data, target))
    dataSet.scale_range(rangee)
    data, target = dataSet.get_data()[0], dataSet.get_data()[1]
    return data, target


def test_regression_spatially_adaptive(test_data, test_targets, adaptiveCombiInstanceSingleDim, grid):
    learned_targets = adaptiveCombiInstanceSingleDim(test_data)
    
    difference = 0
    for i in range(len(learned_targets)):
        difference += (test_targets[i] - learned_targets[i]) ** 2
        
    return math.sqrt(difference/len(test_targets))

def find_best_lambda_C(data, targets, level_min=1, level_max=5):
    lambdas = np.zeros(7)
    training_data, training_targets, test_data, test_targets = split_dataset(data, target, 4)
    
    for i in range(3,10):
        operation, combiObject = train_regression(training_data, training_targets, 10**-i, 0, level_min, level_max)
        lambdas[i-3] = test_regression(test_data, test_targets, combiObject)
        
    exp = np.where(lambdas == np.amin(lambdas))
    return 10.**-(exp[0][0])
    
def find_best_lambda_I(data, targets, level_min=1, level_max=5):
    lambdas = np.zeros(7)
    training_data, training_targets, test_data, test_targets = split_dataset(data, target, 4)
    
    for i in range(3,10):
        operation, combiObject = train_regression(training_data, training_targets, 10**-i, 1, level_min, level_max)
        lambdas[i-3] = test_regression(test_data, test_targets, combiObject)
        
    exp = np.where(lambdas == np.amin(lambdas))
    return 10.**-(exp[0][0])

## Test: Increasing size of dataset -> Error
Information: 
- Data set: Gaussian function ((i*10)^2 points with i in [1,10])
- levels: 1,5
- training / test ratio: 1/5
- lambda: 10^-6
- matrix: C and I

In [None]:
%matplotlib inline
import numpy as np
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *
from sparseSpACE.StandardCombi import *
from sklearn import datasets

func = GenzGaussian((0.5,0.5), (10,10))

stringBuilderC = ""
for i in range(1,11):
    data, target = construct_dataset(2, func, (i*10)**2)
    training_data, training_targets, test_data, test_targets = split_dataset(data, target, 5)
    lambdaC = find_best_lambda_C(data, target, 1, 5)
    operation, combiObject = train_regression(training_data, training_targets, lambdaC, 0, 1, 5)
    stringBuilderC += "" + str((i*10)**2) + " & "+ str(test_regression(test_data, test_targets, combiObject)) + "\\\\\n"

stringBuilderI = ""
for i in range(1,11):
    data, target = construct_dataset(2, func, (i*10)**2)
    training_data, training_targets, test_data, test_targets = split_dataset(data, target, 5)
    lambdaI = find_best_lambda_I(data, target, 1, 5)
    operation, combiObject = train_regression(training_data, training_targets, lambdaI, 1, 1, 5)
    stringBuilderI += "" + str((i*10)**2) + " & "+ str(test_regression(test_data, test_targets, combiObject)) + "\\\\\n"    
    
print("Testfehler:")
print(stringBuilderC)
print(stringBuilderI)

## Test: Increasing number of grid points -> Error
Information: 
- Data set: Gaussian function (400 points)
- levels: 1,x with x in [2,8]
- training / test ratio: 1/5
- lambda: 10^-6
- matrix: C and I

In [None]:
# import sparseSpACE
%matplotlib inline
import numpy as np
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *
from sparseSpACE.StandardCombi import *
from sklearn import datasets

func = GenzGaussian((0.5,0.5), (10,10))
data, target = construct_dataset(2, func, 400)


#split the dataset into training and test data
training_data, training_targets, test_data, test_targets = split_dataset(data, target, 5)

# lambdas already found out
lambdaC = 0.01 #find_best_lambda_C(data, target, 1, 4)
lambdaI = 0.01 #find_best_lambda_I(data, target, 1, 4)
print(lambdaC, lambdaI)

stringBuilderC = ""

for i in range(2, 9):
    operation, combiObject = train_regression(training_data, training_targets, lambdaC, 0, 1, i)
    stringBuilderC += "" + str(operation.get_grid().get_num_points()) + " & "+ str(test_regression(test_data, test_targets, combiObject)) + "\\\\\n"
    
stringBuilderI = ""
for i in range(2, 9):
    operation, combiObject = train_regression(training_data, training_targets, lambdaI, 1, 1, i)
    stringBuilderI += "" + str(operation.get_grid().get_num_points()) + " & "+ str(test_regression(test_data, test_targets, combiObject)) + "\\\\\n"    
    
    
print("Testfehler:")
print(stringBuilderC)
print(stringBuilderI)

## Test: Increasing regularization parameter lambda -> Error
Information: 
- Data set: diabetes
- levels: 1,4
- training / test ratio: 1/5
- lambda: 10^-i with i in [0,10]
- matrix: C and I

In [None]:
%matplotlib inline
import numpy as np
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *
from sparseSpACE.StandardCombi import *
from sparseSpACE.DEMachineLearning import *
from sklearn import datasets

data, target = datasets.load_diabetes(return_X_y=True)

data, target = scale_data(data, target)

training_data, training_targets, test_data, test_targets = split_dataset(data, target, 5)   

operation, combiObject = train_regression(training_data, training_targets, regularization=0, matrix=1, minimum_level=1, maximum_level=4)
print(test_regression(test_data, test_targets, combiObject))

stringBuilder1 = ""
for i in range(10):
    operation, combiObject = train_regression(training_data, training_targets, regularization=10**-i, matrix=0, minimum_level=1, maximum_level=4)
    stringBuilder1 = stringBuilder1 + str(10**-i) +" & " + str(test_regression(test_data, test_targets, combiObject)) + "\\\\\n"

stringBuilder2 = ""
for i in range(10):
    operation, combiObject = train_regression(training_data, training_targets, regularization=10**-i, matrix=1, minimum_level=1, maximum_level=4)
    stringBuilder2 = stringBuilder2 + str(10**-i) +" & " + str(test_regression(test_data, test_targets, combiObject))  + "\\\\\n"
    
print("Tabelle:")
print(stringBuilder1)
print(stringBuilder2)


## Test: Increasing regularization parameter lambda -> Error

Information:

- Data set: boston
- levels: 1,4
- training / test ratio: 1/5
- lambda: 10^-i with i in [0,10]
- matrix: C and I


In [None]:
%matplotlib inline
import numpy as np
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *
from sparseSpACE.StandardCombi import *
from sparseSpACE.DEMachineLearning import *
from sklearn import datasets

data, target = datasets.load_boston(return_X_y=True)

data, target = scale_data(data, target)

training_data, training_targets, test_data, test_targets = split_dataset(data, target, 5)   

operation, combiObject = train_regression(training_data, training_targets, regularization=0, matrix=1, minimum_level=1, maximum_level=4)
print(test_regression(test_data, test_targets, combiObject))

stringBuilder1 = ""
for i in range(10):
    operation, combiObject = train_regression(training_data, training_targets, regularization=10**-i, matrix=0, minimum_level=1, maximum_level=4)
    stringBuilder1 = stringBuilder1 + str(10**-i) +" & " + str(test_regression(test_data, test_targets, combiObject)) + "\\\\\n"

stringBuilder2 = ""
for i in range(10):
    operation, combiObject = train_regression(training_data, training_targets, regularization=10**-i, matrix=1, minimum_level=1, maximum_level=4)
    stringBuilder2 = stringBuilder2 + str(10**-i) +" & " + str(test_regression(test_data, test_targets, combiObject))  + "\\\\\n"
    
print("Tabelle:")
print(stringBuilder1)
print(stringBuilder2)

## Test: Decreasing training/test ratio -> Error

Information:

- Data set: Gaussian Function
- levels: 1,5
- training / test ratio: 1/1 -> 1/29
- lambda: 10^-6
- matrix: C and I



In [None]:
# import sparseSpACE
%matplotlib inline
import numpy as np
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *
from sparseSpACE.StandardCombi import *
from sklearn import datasets

func = GenzGaussian((0.5,0.5), (10,10))
data, target = construct_dataset(2, func, 100)


#split the dataset into training and test data


stringBuilderC = ""

for i in range(2, 25):
    training_data, training_targets, test_data, test_targets = split_dataset(data, target, i)
    lambdaC = 10**-6#find_best_lambda_C(data, target, 1, 5)
    operation, combiObject = train_regression(training_data, training_targets, lambdaC, 0, 1, 5)
    stringBuilderC += "" + str(i) + " & "+ str(test_regression(test_data, test_targets, combiObject)) + "\\\\\n"
    
stringBuilderI = ""
for i in range(2, 25):
    training_data, training_targets, test_data, test_targets = split_dataset(data, target, i)
    lambdaI = 10**-6#find_best_lambda_I(data, target, 1, 5)
    operation, combiObject = train_regression(training_data, training_targets, lambdaI, 1, 1, 5)
    stringBuilderI += "" + str(i) + " & "+ str(test_regression(test_data, test_targets, combiObject)) + "\\\\\n"

print("Testfehler:")
print(stringBuilderC)
print(stringBuilderI)

## Test: Increasing size of data set -> Time

Information:

- Data set: Gaussian Function
- levels: 1,5
- training / test ratio: 1/5
- lambda: 10^-6
- matrix: C and I

In [None]:
%matplotlib inline
import numpy as np
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *
from sparseSpACE.StandardCombi import *
from sklearn import datasets

func = GenzGaussian((0.5,0.5), (10,10))


stringBuilderC = ""

for i in range(1,11):
    data, target = construct_dataset(2, func, (i*10)**2)
    operation, combiObject = train_regression(data, target, 10**-6, 0, 1, 5)
    stringBuilderC += "" + str((i*10)**2) + " & \\\\\n"

stringBuilderI = ""

for i in range(1,11):
    data, target = construct_dataset(2, func, (i*10)**2)
    operation, combiObject = train_regression(data, target, 10**-6, 1, 1, 5)
    stringBuilderI += "" + str((i*10)**2) + " & \\\\\n"  
    
for i in range(1,11):
    data, target = construct_dataset(2, func, (i*10)**2)
    operation, combiObject = train_regression(data, target, 0, 1, 1, 5)
    stringBuilderI += "" + str((i*10)**2) + " & \\\\\n" 
    
print("Testfehler:")
print(stringBuilderC)
print(stringBuilderI)

## Test: Increasing number of grid points -> Time
Information: 
- Data set: Gaussian function (400 points)
- levels: 1,x with x in [2,8]
- training / test ratio: 1/5
- lambda: 10^-6
- matrix: C and I

In [None]:
%matplotlib inline
import numpy as np
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *
from sparseSpACE.StandardCombi import *
from sklearn import datasets

func = GenzGaussian((0.5,0.5), (10,10))
data, target = construct_dataset(2, func, 400)

stringBuilderC = ""

for i in range(2, 9):
    operation, combiObject = train_regression(data, target, 10**-6, 0, 1, i)
    stringBuilderC += "" + str(operation.get_grid().get_num_points()) + " & " + str(combiObject.get_time_used()) + "\\\\\n"
    
stringBuilderI = ""
for i in range(2, 9):
    operation, combiObject = train_regression(data, target, 10**-6, 1, 1, i)
    stringBuilderI += "" + str(operation.get_grid().get_num_points()) + " & " + str(combiObject.get_time_used()) + "\\\\\n"
    
sstringBuilderI += "\n"
for i in range(2, 9):
    operation, combiObject = train_regression(data, target, 0, 0, 1, i)
    stringBuilderI += "" + str(operation.get_grid().get_num_points()) + " & " + str(combiObject.get_time_used()) + "\\\\\n"

print("Testfehler:")
print(stringBuilderC)
print(stringBuilderI)

# Spatially adaptive

## Test: Increasing lambda -> Fault


############################################################################################################################################
Information:

- Data set: Gaussian function (500 points)
- training / test ratio: 1/5
- lambda: 10^-6
- margin = 0.5
- max_evaluations = 500

In [None]:
%matplotlib inline
import sparseSpACE
import numpy as np
from sparseSpACE.spatiallyAdaptiveSingleDimension2 import *
from sparseSpACE.Function import *
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *

# dimension of the problem
dim = 2

margin = 0.3
max_evaluations_var = 200
stop_error = 10**-5

# define integration domain boundaries
a = np.zeros(dim)
b = np.ones(dim)

func = GenzGaussian((0.5,0.5), (10,10))
data, target = construct_dataset(dim, func, 400)

data, target = scale_data(data, target)


#split the dataset into training and test data
training_data, training_targets, test_data, test_targets = split_dataset(data, target, 5)

# define error estimator for refinement
errorOperator = ErrorCalculatorSingleDimVolumeGuided()

# define equidistant grid
grid=GlobalTrapezoidalGrid(a=a, b=b, modified_basis=False, boundary=False)

operation = Regression(training_data, training_targets, 0, dim, regularization_matrix=1, grid=grid)
adaptiveCombiInstanceSingleDim = SpatiallyAdaptiveSingleDimensions2(np.ones(dim) * a, np.ones(dim) * b, margin, operation=operation, rebalancing=False)
adaptiveCombiInstanceSingleDim.performSpatiallyAdaptiv(1, 2, errorOperator, stop_error, do_plot=False, max_evaluations=max_evaluations_var)
differenceOhne = test_regression_spatially_adaptive(test_data, test_targets, adaptiveCombiInstanceSingleDim, operation.grid)


stringBuilderC = ""
for i in range(10):
    operation = Regression(training_data, training_targets, 10**-i, dim, regularization_matrix=0, grid=grid)
    adaptiveCombiInstanceSingleDim = SpatiallyAdaptiveSingleDimensions2(np.ones(dim) * a, np.ones(dim) * b, margin, operation=operation, rebalancing=False)
    adaptiveCombiInstanceSingleDim.performSpatiallyAdaptiv(1, 2, errorOperator, stop_error, do_plot=False, max_evaluations=max_evaluations_var)
    difference = test_regression_spatially_adaptive(test_data, test_targets, adaptiveCombiInstanceSingleDim, operation.grid)
    stringBuilderC += str(10**-i) + " & " + str(difference) + "\\\\\n"
    
stringBuilderI = ""
for i in range(10):
    operation = Regression(training_data, training_targets, 10**-i, dim, regularization_matrix=1, grid=grid)
    adaptiveCombiInstanceSingleDim = SpatiallyAdaptiveSingleDimensions2(np.ones(dim) * a, np.ones(dim) * b, margin, operation=operation, rebalancing=False)
    adaptiveCombiInstanceSingleDim.performSpatiallyAdaptiv(1, 2, errorOperator, stop_error, do_plot=False, max_evaluations=max_evaluations_var)
    difference = test_regression_spatially_adaptive(test_data, test_targets, adaptiveCombiInstanceSingleDim, operation.grid)
    stringBuilderI += str(10**-i) + " & " + str(difference) + "\\\\\n"
    
print(stringBuilderC)
print(stringBuilderI)
print("Ohne: " + str(differenceOhne))

## Test: Increasing max evaluations -> Fault
###########################################

Information:

- Data set: Gaussian function
- training / test ratio: 1/5
- lambda: 0
- margin = 0.5
- max_evaluations = 500

In [None]:
%matplotlib inline
import sparseSpACE
import numpy as np
from sparseSpACE.spatiallyAdaptiveSingleDimension2 import *
from sparseSpACE.Function import *
from sparseSpACE.ErrorCalculator import *
from sparseSpACE.GridOperation import *

# dimension of the problem
dim = 2

margin = 0.3
max_evaluations_var = 200
stop_error = 10**-5

# define integration domain boundaries
a = np.zeros(dim)
b = np.ones(dim)

func = GenzGaussian((0.5,0.5), (10,10))
data, target = construct_dataset(dim, func, 400)

data, target = scale_data(data, target)


#split the dataset into training and test data
training_data, training_targets, test_data, test_targets = split_dataset(data, target, 5)

# define error estimator for refinement
errorOperator = ErrorCalculatorSingleDimVolumeGuided()

# define equidistant grid
grid=GlobalTrapezoidalGrid(a=a, b=b, modified_basis=False, boundary=False)

stringBuilderOhne = ""
for i in range(4):
    operation = Regression(training_data, training_targets, 0, dim, regularization_matrix=0, grid=grid)
    adaptiveCombiInstanceSingleDim = SpatiallyAdaptiveSingleDimensions2(np.ones(dim) * a, np.ones(dim) * b, margin, operation=operation, rebalancing=False)
    adaptiveCombiInstanceSingleDim.performSpatiallyAdaptiv(1, 2, errorOperator, stop_error, do_plot=False, max_evaluations=10**i)
    difference = test_regression_spatially_adaptive(test_data, test_targets, adaptiveCombiInstanceSingleDim, operation.grid)
    stringBuilderOhne += str(10**i) + " & " + str(difference) + "\\\\\n"

print(stringBuilderOhne)