<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Preparation" data-toc-modified-id="Preparation-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Preparation</a></span></li><li><span><a href="#Specification-of-Experiment-Settings" data-toc-modified-id="Specification-of-Experiment-Settings-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Specification of Experiment Settings</a></span></li><li><span><a href="#Generation-of-all-possible-Monomials" data-toc-modified-id="Generation-of-all-possible-Monomials-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Generation of all possible Monomials</a></span></li><li><span><a href="#Generation-of-randomly-chosen-Polynomials" data-toc-modified-id="Generation-of-randomly-chosen-Polynomials-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Generation of randomly chosen Polynomials</a></span><ul class="toc-item"><li><span><a href="#Polynomials-as-'sympy.polys.polytools.Poly'-objects" data-toc-modified-id="Polynomials-as-'sympy.polys.polytools.Poly'-objects-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Polynomials as 'sympy.polys.polytools.Poly' objects</a></span></li><li><span><a href="#Polynomials-as-coefficient-rows-and-monomials-in-dataframe" data-toc-modified-id="Polynomials-as-coefficient-rows-and-monomials-in-dataframe-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>Polynomials as coefficient rows and monomials in dataframe</a></span></li></ul></li><li><span><a href="#Precompute-Polynomials-with-Input-Values" data-toc-modified-id="Precompute-Polynomials-with-Input-Values-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Precompute Polynomials with Input Values</a></span></li><li><span><a href="#Data-Export" data-toc-modified-id="Data-Export-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Data Export</a></span></li></ul></div>

# Function Generation for the Training of λ-Nets

## Preparation

In [1]:
##############DO NOT CHANGE###################

# Importing own helper library
import sys
sys.path.insert(0,'../_baselib')
import general_helper as gh
import polynom_helper as ph

# Third-party imports
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
from joblib import Parallel, delayed 
# sympy for Polynomial support
from sympy.polys import monomials
from sympy.polys.orderings import monomial_key
from sympy import symbols

# Static settings & directory preparation
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
    
gh.create_dir('./data')
subdirectories = ['parameters', 'plotting', 'saved_polynomial_lists', 'results', 'saved_models', 'weights', 'weights_training']
for dir_ in subdirectories:
    gh.create_dir('./data/' + dir_)

## Specification of Experiment Settings

In [2]:
# parse settings from a configuration file ('config.ini')

config_path = 'config.ini'

try:
    config = gh.parse_config(config_path)
    locals().update(config)
    
except Exception as e:
    print('Parsing not possible due to Exception:')
    print(e)
    print('\nContinue with manual specification.')

In [3]:
# manual override of config and/or additional variable specification

#d = 3  
#n = 4
#sparsity = 0.0
#a_max = 10 
#a_min = -10
#a_step = 0.1
#x_max = 1 
#x_min = -1
#x_step = 0.01
#lambda_dataset_size = 100 
#interpretation_dataset_size = 5000
#same_training_all_polynomials = True

#n_jobs = 10

# ------------------------------------------------------------------

In [4]:
##############DO NOT CHANGE###################

# set derived attributes
variables = 'abcdefghijklmnopqrstuvwxyz'[:n]
if same_training_all_polynomials: training_string = '_same'
else: training_string = '_diverse'

In [5]:
# inspect most important settings
print('Variables: ' + str(n) + ' (' + variables + ')')
print('Degree: ' + str(d))
print('Sparsity: ' + str(sparsity*100) + '%') 
print('Lambda-Net Dataset Size: ' + str(lambda_dataset_size))
print('I-Net Dataset Size: ' + str(interpretation_dataset_size))
      
print('Coefficient Range: ' + '[' + str(a_min) + ', ' + str(a_max) + ']')
print('Variable Range: ' + '[' + str(x_min) + ', ' + str(x_max) + ']')

Variables: 4 (abcd)
Degree: 3
Sparsity: 0.0%
Lambda-Net Dataset Size: 1000
I-Net Dataset Size: 1000
Coefficient Range: [-10.0, 10.0]
Variable Range: [-1.0, 1.0]


## Generation of all possible Monomials

In [6]:
polys_symbols = symbols(list(variables))
polys_monomials = sorted(monomials.itermonomials(variables = polys_symbols, max_degrees = d, min_degrees = None), 
                                      reverse = True, key = monomial_key('lex', polys_symbols))
monomials_count = monomials.monomial_count(n, d)

tuples_monomials = [ph.monomial_to_power_tuple(mon, polys_symbols) for mon in polys_monomials]

print('List length: ' + str(len(polys_monomials)))
print('Number of monomials in a polynomial with ' + str(n) + ' variables and degree ' + str(d) + ': ' + str(monomials_count))
print(polys_monomials)
print(tuples_monomials)

List length: 35
Number of monomials in a polynomial with 4 variables and degree 3: 35
[a**3, a**2*b, a**2*c, a**2*d, a**2, a*b**2, a*b*c, a*b*d, a*b, a*c**2, a*c*d, a*c, a*d**2, a*d, a, b**3, b**2*c, b**2*d, b**2, b*c**2, b*c*d, b*c, b*d**2, b*d, b, c**3, c**2*d, c**2, c*d**2, c*d, c, d**3, d**2, d, 1]
[(3, 0, 0, 0), (2, 1, 0, 0), (2, 0, 1, 0), (2, 0, 0, 1), (2, 0, 0, 0), (1, 2, 0, 0), (1, 1, 1, 0), (1, 1, 0, 1), (1, 1, 0, 0), (1, 0, 2, 0), (1, 0, 1, 1), (1, 0, 1, 0), (1, 0, 0, 2), (1, 0, 0, 1), (1, 0, 0, 0), (0, 3, 0, 0), (0, 2, 1, 0), (0, 2, 0, 1), (0, 2, 0, 0), (0, 1, 2, 0), (0, 1, 1, 1), (0, 1, 1, 0), (0, 1, 0, 2), (0, 1, 0, 1), (0, 1, 0, 0), (0, 0, 3, 0), (0, 0, 2, 1), (0, 0, 2, 0), (0, 0, 1, 2), (0, 0, 1, 1), (0, 0, 1, 0), (0, 0, 0, 3), (0, 0, 0, 2), (0, 0, 0, 1), (0, 0, 0, 0)]


## Generation of randomly chosen Polynomials

In [7]:
all_coefficients = np.random.choice(np.arange(a_min, a_max + a_step, a_step), 
                                       size=(interpretation_dataset_size, monomials_count), replace=True)
all_coefficients = np.round(all_coefficients, int(-np.log10(a_step)))
    
# Sparsity
if sparsity > 0:
    # add sparsity --> set random coefficients to 0 (#coefficients = #monomials - sparsity)
    for coeffs in tqdm(all_coefficients):
        sparsity_indices = np.random.choice(monomials_count + 1, floor(monomials_count * sparsity), replace=False)
        for sparsity_index in sparsity_indices:                            
            coeffs[sparsity_index] = 0

### Polynomials as 'sympy.polys.polytools.Poly' objects

In [8]:
polynomials_poly = [ph.build_Poly(coeffs, tuples_monomials, polys_symbols) for coeffs in all_coefficients]

# inspection of first polynomial
polynomials_poly [0]

Poly(0.2*a**3 + 7.9*a**2*b - 0.8*a**2*c - 8.6*a**2*d + 0.6*a**2 - 2.9*a*b**2 + 8.8*a*b*c - 8.0*a*b*d + 0.2*a*b + 2.1*a*c**2 - 2.6*a*c*d - 1.3*a*c + 1.6*a*d**2 - 0.1*a*d + 0.3*a + 5.1*b**3 + 3.0*b**2*c + 4.9*b**2*d - 4.8*b**2 - 9.9*b*c**2 - 1.3*b*c*d + 5.7*b*c - 6.3*b*d**2 + 2.9*b*d + 9.1*b + 8.7*c**3 - 8.0*c**2*d + 6.0*c**2 - 4.3*c*d**2 - 7.9*c*d - 1.2*c - 5.2*d**3 - 4.2*d**2 + 6.9*d + 8.7, a, b, c, d, domain='RR')

### Polynomials as coefficient rows and monomials in dataframe

In [9]:
polynomials_df = pd.DataFrame(data=all_coefficients, columns=polys_monomials)

# data inspection
print(len(polys_monomials))
print(polynomials_df.shape)
polynomials_df.head()

35
(1000, 35)


Unnamed: 0,a**3,a**2*b,a**2*c,a**2*d,a**2,a*b**2,a*b*c,a*b*d,a*b,a*c**2,...,c**3,c**2*d,c**2,c*d**2,c*d,c,d**3,d**2,d,1
0,0.2,7.9,-0.8,-8.6,0.6,-2.9,8.8,-8.0,0.2,2.1,...,8.7,-8.0,6.0,-4.3,-7.9,-1.2,-5.2,-4.2,6.9,8.7
1,-8.6,8.9,8.9,7.4,8.9,-5.0,0.7,-4.6,-3.7,3.0,...,-0.9,1.0,8.7,9.8,7.1,-9.3,7.4,-6.6,-2.0,6.3
2,-5.1,0.3,3.1,-9.9,3.3,-4.7,0.5,-9.7,-4.7,9.0,...,5.3,8.7,2.3,-6.0,5.6,-8.6,-5.6,-3.6,-1.2,-3.0
3,-9.2,-1.3,2.8,3.5,-3.8,3.8,-2.0,3.5,6.2,6.2,...,-0.2,7.1,0.3,-6.6,9.2,-0.0,7.4,3.0,-10.0,-9.6
4,4.1,0.2,-7.4,3.6,-8.6,-1.1,-5.9,2.3,7.8,-3.8,...,-3.5,6.9,-5.6,-3.9,8.4,3.3,-7.3,-7.3,0.7,-5.7


## Precompute Polynomials with Input Values

In [10]:
# create input values

if same_training_all_polynomials:
        
    x_values = np.random.choice(np.arange(x_min, x_max + x_step, x_step), 
                                size=(lambda_dataset_size, n), replace=True)
    x_values = np.round(x_values, int(-np.log10(x_step)))

# inspection
print('Number of points: ' + str(len(x_values)))
print('Number of values per point: ' + str(len(x_values[0])))
x_values[0:10]

Number of points: 1000
Number of values per point: 4


array([[-0.83, -0.43,  0.25, -0.28],
       [ 0.09, -0.49, -0.55,  0.06],
       [-0.88, -0.05, -0.84, -0.34],
       [-0.4 , -0.36,  0.8 ,  0.32],
       [ 0.59, -0.12,  0.72, -0.45],
       [-0.96,  0.  , -0.14,  0.61],
       [-0.4 ,  0.89, -0.32, -0.38],
       [-0.57, -0.73,  0.97, -0.88],
       [ 0.7 , -0.71,  0.53, -0.38],
       [-0.28, -0.44, -0.37,  0.76]])

In [11]:
# compute function values in parallel

def compute_poly(poly, x_values):
    y_values = np.array([float(poly.eval(tuple(x))) for x in x_values])
    return x_values, y_values
    
parallel = Parallel(n_jobs=n_jobs, backend='loky')    
result_parallel = parallel(delayed(compute_poly)(poly, x_values) for poly in tqdm(polynomials_poly))

# dimension: [Polynomial: [x:[4], y]]
x_y_data = result_parallel

x_data = [poly[0] for poly in x_y_data]
y_data = [poly[1] for poly in x_y_data]

del parallel, result_parallel

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




In [12]:
# inspect first polynomial of x_y_data
print(len(x_y_data[0]))
print(len(x_y_data[0][0]))
print(len(x_y_data[0][1]))
x_y_data[0]

2
1000
1000


(array([[-0.83, -0.43,  0.25, -0.28],
        [ 0.09, -0.49, -0.55,  0.06],
        [-0.88, -0.05, -0.84, -0.34],
        ...,
        [ 0.81,  0.12, -0.06, -0.23],
        [ 0.92,  0.41,  0.04, -0.96],
        [-0.06,  0.89,  0.28, -0.38]]),
 array([ 3.44159060e+00,  6.91303640e+00,  7.00199390e+00,  1.05460288e+01,
         1.50289468e+01,  5.64676300e+00,  9.40607450e+00,  2.62604900e+01,
        -4.80214550e+00,  7.07255010e+00,  5.41226160e+00,  4.74831470e+00,
         7.03660170e+00,  8.67052000e-02, -8.06608260e+00,  9.28291330e+00,
         5.73990210e+00,  1.07941850e+01,  1.09609590e+00, -1.34509060e+00,
         6.86137140e+00,  1.00378557e+01,  4.19627610e+00, -5.65660080e+00,
         7.90678590e+00,  3.54383440e+00,  8.41002910e+00,  1.29146392e+01,
         1.15057987e+01,  7.94593200e+00,  2.03225657e+01,  9.91064440e+00,
         1.76447399e+01, -5.95921270e+00,  2.22351440e+00,  1.40435376e+01,
         1.02109272e+01, -7.61088260e+00,  2.29500600e-01,  6.86223490e+0

In [13]:
# inspect first polynomial of x_data
print(len(x_data[0]))
x_data[0]

1000


array([[-0.83, -0.43,  0.25, -0.28],
       [ 0.09, -0.49, -0.55,  0.06],
       [-0.88, -0.05, -0.84, -0.34],
       ...,
       [ 0.81,  0.12, -0.06, -0.23],
       [ 0.92,  0.41,  0.04, -0.96],
       [-0.06,  0.89,  0.28, -0.38]])

In [14]:
# inspect first polynomial of y_data
print(len(y_data[0]))
y_data[0]

1000


array([ 3.44159060e+00,  6.91303640e+00,  7.00199390e+00,  1.05460288e+01,
        1.50289468e+01,  5.64676300e+00,  9.40607450e+00,  2.62604900e+01,
       -4.80214550e+00,  7.07255010e+00,  5.41226160e+00,  4.74831470e+00,
        7.03660170e+00,  8.67052000e-02, -8.06608260e+00,  9.28291330e+00,
        5.73990210e+00,  1.07941850e+01,  1.09609590e+00, -1.34509060e+00,
        6.86137140e+00,  1.00378557e+01,  4.19627610e+00, -5.65660080e+00,
        7.90678590e+00,  3.54383440e+00,  8.41002910e+00,  1.29146392e+01,
        1.15057987e+01,  7.94593200e+00,  2.03225657e+01,  9.91064440e+00,
        1.76447399e+01, -5.95921270e+00,  2.22351440e+00,  1.40435376e+01,
        1.02109272e+01, -7.61088260e+00,  2.29500600e-01,  6.86223490e+00,
        1.27483371e+01,  3.93557350e+00,  5.60358460e+00,  3.75572990e+00,
        5.90018400e+00,  2.38544613e+01,  4.11002420e+00, -2.81727100e+00,
        1.22000539e+01,  1.23121740e+00,  1.44026970e+01,  1.34367460e+01,
        2.16348467e+01,  

## Data Export

In [15]:
# Csv exports
path_polynomials_df = './data/saved_polynomial_lists/polynomials_df' + str(interpretation_dataset_size) + '_variables_' + str(n) +  '_degree_' + str(d) + '_sparsity_' + str(sparsity) + '_astep_' + str(a_step)  + '_amin_' + str(a_min) + '_amax_' + str(a_max) + '.csv'
gh.export_csv(polynomials_df, path_polynomials_df)

# Pickle exports
path_polynomials_poly = './data/saved_polynomial_lists/polynomials_poly' + str(interpretation_dataset_size) + '_train_' + str(lambda_dataset_size) + '_variables_' + str(n) + '_degree_' + str(d) + '_sparsity_' + str(sparsity) + '_astep_' + str(a_step) + '_amin_' + str(a_min) + '_amax_' + str(a_max) + '_xstep_' + str(x_step) + '_xmin_' + str(x_min) + '_xmax_' + str(x_max) + training_string + '.pkl'
gh.export_pickle(polynomials_poly, path_polynomials_poly)

path_x_data = './data/saved_polynomial_lists/x_data' + str(interpretation_dataset_size) + '_train_' + str(lambda_dataset_size) + '_variables_' + str(n) + '_degree_' + str(d) + '_sparsity_' + str(sparsity) + '_astep_' + str(a_step) + '_amin_' + str(a_min) + '_amax_' + str(a_max) + '_xstep_' + str(x_step) + '_xmin_' + str(x_min) + '_xmax_' + str(x_max) + training_string + '.pkl'
gh.export_pickle(x_data, path_x_data)

path_y_data = './data/saved_polynomial_lists/y_data' + str(interpretation_dataset_size) + '_train_' + str(lambda_dataset_size) + '_variables_' + str(n) + '_degree_' + str(d) + '_sparsity_' + str(sparsity) + '_astep_' + str(a_step) + '_amin_' + str(a_min) + '_amax_' + str(a_max) + '_xstep_' + str(x_step) + '_xmin_' + str(x_min) + '_xmax_' + str(x_max) + training_string + '.pkl'
gh.export_pickle(y_data, path_y_data)
