In [None]:
import numpy as np
import math
import matplotlib.pyplot as plt
import multifidelityfunctions as mff
import multiLevelCoSurrogates as mlcs
import more_itertools
import pyDOE

plot_dir = 'plots/'

In [None]:
@mff.row_vectorize
def sphere(x):
    return sum(xi**2 for xi in x)

In [None]:
xrange = np.linspace(-1,1,201)
plt.plot(xrange, [sphere([x]) for x in xrange], label='function')
sample = mlcs.sample_by_function(sphere, ndim=1, n_samples=1000, minimize=True, range_in=(-1,1), range_out=(0,1))
plt.hist(sample, bins=80, density=True, alpha=.5, label='minimize')
sample = mlcs.sample_by_function(sphere, ndim=1, n_samples=1000, minimize=False, range_in=(-1,1), range_out=(0,1))
plt.hist(sample, bins=80, density=True, alpha=.5, label='maximize')
plt.legend(loc=0)
plt.show()

# Error spread visualization

In [None]:
@mff.row_vectorize
def forrester(xx):
    term1 = (6*xx - 2)**2
    term2 = math.sin(12*xx - 4)

    return 22 - (term1 * term2 + 6.03)

In [None]:
xrange = np.linspace(0,1,101)
plt.plot(xrange, forrester(xrange))
plt.ylim([0,22.5])
plt.show()

In [None]:
y = forrester(np.linspace(0,1,10001))
print(np.min(y), np.max(y))

In [None]:
archive = mlcs.CandidateArchive(ndim=1)#, fidelities=['high', 'low', 'high-low'])

n_samples = 5

np.random.seed(20160501)
x = np.random.rand(5, 1)
# x = np.linspace(0,1,n_samples).reshape(-1,1)

y = forrester(x)
archive.addcandidates(x, forrester(x))#, fidelity='high')

surr = mlcs.Surrogate.fromname('Kriging', archive)#, kernel='Matern')
surr.retrain()

plt.plot(xrange, forrester(xrange))
plt.plot(xrange, surr.predict(xrange.reshape(-1,1)))
plt.scatter(*archive.getcandidates())
plt.ylim([0,22.5])
plt.show()

In [None]:
def test_spacing_of_sample(test_sample, test_func):
    sample_size = len(test_sample)
    sample = sorted(test_sample)
    
    x = surr.predict(sample).reshape(-1, 1)
    y = test_func(sample).reshape(-1, 1)
    square_errors = (x - y)**2
    print(f'MSE: {np.mean(square_errors)}')

    edges = (test_sample[:-1] + test_sample[1:]) / 2
    edges = [0] + edges.flatten().tolist() + [1]
    
    lengths = np.diff(edges).reshape(-1,1)

    return lengths, square_errors / lengths

In [None]:
np.random.seed(20160501)
n_samples = 200
test_func = forrester

np.set_printoptions(precision=5, linewidth=180)

test_sample = mlcs.sample_by_function(test_func, ndim=1, n_samples=n_samples, minimize=False, range_in=(0,1), range_out=(0,22))
test_sample = np.sort(test_sample, axis=0)


lengths, lne = test_spacing_of_sample(test_sample, test_func=test_func)
edges = (test_sample[:-1] + test_sample[1:]) / 2
edges = np.array([0] + edges.flatten().tolist() + [1])

ax1 = plt.subplot(111)

ax1.plot(xrange, test_func(xrange))
ax1.plot(xrange, surr.predict(xrange.reshape(-1,1)))
ax1.scatter(test_sample, test_func(test_sample))


bar_midpoints = (edges[:-1] + edges[1:]) / 2

ax2 = ax1.twinx()
ax2.bar(bar_midpoints, lne.flatten(), width=lengths.flatten(), bottom=-7, color='C3', alpha=.3)
ax1.set_title('function-based probability distribution')
ax1.set_ylim([0,22.5])
ax1.set_xlim([0,1])
plt.show()

In [None]:
np.random.seed(20160501)
test_sample = sorted(np.random.rand(100,1))

lenghts, lne = test_spacing_of_sample(test_sample)
edges = [0] + [((test_sample[i-1]+test_sample[i])/2)[0] for i in range(10, 100, 10)] + [1]

lne = lne / max(lne) * 20

plt.plot(xrange, forrester(xrange))
plt.plot(xrange, surr.predict(xrange.reshape(-1,1)))
plt.scatter(test_sample, forrester(test_sample))
for e in edges:
    plt.axvline(e, c='black', alpha=.5)

bar_midpoints = [(a+b)/2 for a, b in more_itertools.pairwise(edges)]

plt.bar(bar_midpoints, lne, width=lenghts, bottom=-7, color='C3', alpha=.3)
plt.title('uniform random probability distribution')
plt.show()

In [None]:
np.random.seed(20160501)
test_sample = sorted(pyDOE.lhs(1, 100))

lenghts, lne = test_spacing_of_sample(test_sample)
edges = [0] + [((test_sample[i-1]+test_sample[i])/2)[0] for i in range(10, 100, 10)] + [1]

lne = lne / max(lne) * 20

plt.plot(xrange, forrester(xrange))
plt.plot(xrange, surr.predict(xrange.reshape(-1,1)))
plt.scatter(test_sample, forrester(test_sample))
for e in edges:
    plt.axvline(e, c='black', alpha=.5)

bar_midpoints = [(a+b)/2 for a, b in more_itertools.pairwise(edges)]

plt.bar(bar_midpoints, lne, width=lenghts, bottom=-7, color='C3', alpha=.3)
plt.title('LHS distribution')
plt.show()

In [None]:
sample_size = 1000

plt.figure(figsize=(18,5))

###########

np.random.seed(20160501)
test_sample = sorted(np.random.rand(sample_size,1))

lengths, lne = test_spacing_of_sample(test_sample)
edges = [0] + [((test_sample[i-1]+test_sample[i])/2)[0] for i in range(1, sample_size)] + [1]

ax1 = plt.subplot(131)
ax1.plot(xrange, forrester(xrange), label='True function')
ax1.plot(xrange, surr.predict(xrange.reshape(-1,1)), label='GP model')
ax1.scatter(test_sample, forrester(test_sample), color='C2', label=f'test sample (n={sample_size})', s=12)

bar_midpoints = [(a+b)/2 for a, b in more_itertools.pairwise(edges)]

ax2 = ax1.twinx()
ax2.bar(bar_midpoints, lne.flatten(), width=lengths.flatten(), bottom=-7, color='C3', alpha=.3, label='normalized SSE per section')
ax1.set_title('uniform random probability distribution')
ax1.legend()

###############

np.random.seed(20160501)
test_sample = sorted(pyDOE.lhs(1, sample_size))

lengths, lne = test_spacing_of_sample(test_sample)
edges = [0] + [((test_sample[i-1]+test_sample[i])/2)[0] for i in range(1, sample_size)] + [1]

ax1 = plt.subplot(132)
ax1.plot(xrange, forrester(xrange), label='True function')
ax1.plot(xrange, surr.predict(xrange.reshape(-1,1)), label='GP model')
ax1.scatter(test_sample, forrester(test_sample), color='C2', label=f'test sample (n={sample_size})', s=12)

bar_midpoints = [(a+b)/2 for a, b in more_itertools.pairwise(edges)]

ax2 = ax1.twinx()
ax2.bar(bar_midpoints, lne.flatten(), width=lengths.flatten(), bottom=-7, color='C3', alpha=.3, label='normalized SSE per section')
ax1.set_title('LHS distribution')

##################

np.random.seed(20160501)
test_sample = mlcs.sample_by_function(forrester, ndim=1, n_samples=sample_size, minimize=True, range_in=(0,1), range_out=(-6,16))
test_sample = sorted(test_sample)

lengths, lne = test_spacing_of_sample(test_sample)
edges = [0] + [((test_sample[i-1]+test_sample[i])/2)[0] for i in range(1, sample_size)] + [1]

ax1 = plt.subplot(133)
ax1.plot(xrange, forrester(xrange), label='True function')
ax1.plot(xrange, surr.predict(xrange.reshape(-1,1)), label='GP model')
ax1.scatter(test_sample, forrester(test_sample), color='C2', label=f'test sample (n={sample_size})', s=12)

bar_midpoints = [(a+b)/2 for a, b in more_itertools.pairwise(edges)]

ax2 = ax1.twinx()
ax2.bar(bar_midpoints, lne.flatten(), width=lengths.flatten(), bottom=-7, color='C3', alpha=.3, label='normalized SSE per section')
ax1.set_title('function-based probability distribution')

###################

plt.tight_layout()
plt.savefig(f'{plot_dir}normed_errors_per_section.png')
plt.savefig(f'{plot_dir}normed_errors_per_section.pdf')
plt.show()

Cumulative version

In [None]:
sample_size = 1000

plt.figure(figsize=(18,5))

###########

np.random.seed(20160501)
test_sample = sorted(np.random.rand(sample_size,1))

lengths, lne = test_spacing_of_sample(test_sample)
edges = [0] + [((test_sample[i-1]+test_sample[i])/2)[0] for i in range(1, sample_size)] + [1]

ax1 = plt.subplot(131)
ax1.plot(xrange, forrester(xrange), label='True function')
ax1.plot(xrange, surr.predict(xrange.reshape(-1,1)), label='GP model')
ax1.scatter(test_sample, forrester(test_sample), color='C2', label=f'test sample (n={sample_size})', s=12)

bar_midpoints = [(a+b)/2 for a, b in more_itertools.pairwise(edges)]

ax2 = ax1.twinx()
ax2.bar(bar_midpoints, np.cumsum(lne*lengths), width=lengths.flatten(), bottom=-7, color='C3', alpha=.3, label='normalized SSE per section')
# ax1.plot(np.linspace(0, 1, len(lne)-1), np.sqrt(np.diff(np.cumsum(lne*lengths))))
ax1.set_title('uniform random probability distribution')
ax1.legend()

###############

np.random.seed(20160501)
test_sample = sorted(pyDOE.lhs(1, sample_size))

lengths, lne = test_spacing_of_sample(test_sample)
edges = [0] + [((test_sample[i-1]+test_sample[i])/2)[0] for i in range(1, sample_size)] + [1]

ax1 = plt.subplot(132)
ax1.plot(xrange, forrester(xrange), label='True function')
ax1.plot(xrange, surr.predict(xrange.reshape(-1,1)), label='GP model')
ax1.scatter(test_sample, forrester(test_sample), color='C2', label=f'test sample (n={sample_size})', s=12)

bar_midpoints = [(a+b)/2 for a, b in more_itertools.pairwise(edges)]

ax2 = ax1.twinx()
ax2.bar(bar_midpoints, np.cumsum(lne*lengths), width=lengths.flatten(), bottom=-7, color='C3', alpha=.3, label='normalized SSE per section')
# ax1.plot(np.linspace(0, 1, len(lne)-1), np.sqrt(np.diff(np.cumsum(lne*lengths))))
ax1.set_title('LHS distribution')

##################

np.random.seed(20160501)
test_sample = mlcs.sample_by_function(forrester, ndim=1, n_samples=sample_size, minimize=True, range_in=(0,1), range_out=(-6,16))
test_sample = sorted(test_sample)

lengths, lne = test_spacing_of_sample(test_sample)
edges = [0] + [((test_sample[i-1]+test_sample[i])/2)[0] for i in range(1, sample_size)] + [1]

ax1 = plt.subplot(133)
ax1.plot(xrange, forrester(xrange), label='True function')
ax1.plot(xrange, surr.predict(xrange.reshape(-1,1)), label='GP model')
ax1.scatter(test_sample, forrester(test_sample), color='C2', label=f'test sample (n={sample_size})', s=12)

bar_midpoints = [(a+b)/2 for a, b in more_itertools.pairwise(edges)]

ax2 = ax1.twinx()
ax2.bar(bar_midpoints, np.cumsum(lne*lengths), width=lengths.flatten(), bottom=-7, color='C3', alpha=.3, label='normalized SSE per section')
# ax1.plot(np.linspace(0, 1, len(lne)-1), np.sqrt(np.diff(np.cumsum(lne*lengths))))
ax1.set_title('function-based probability distribution')

###################

plt.tight_layout()
plt.savefig(f'{plot_dir}normed_errors_per_section.png')
plt.savefig(f'{plot_dir}normed_errors_per_section.pdf')
plt.show()

In [None]:
y = mff.OD.oneDimensional.high(xrange)
plt.plot(xrange, np.cumsum(y+6.02))

# Some simple examples

A number of cases where a model is trained on just 4 points in the 1D function case.
Intended for illustration of which models we want to consider as better/worse and how this is achieved using the FSS method

In [None]:
xrange = np.linspace(0,1,101)
samples = np.array([
    [[0.0, 0.4, 0.6, 1.0],
     [0.0, 0.33, 0.66, 1.0],
     [0.0, 0.6, 0.8, 1.0],],

    [[0.1, 0.3, 0.5, 0.7],
     [0.2, 0.4, 0.6, 0.8],
     [0.0, 0.1, 0.3, 0.9],],

    [[0.1, 0.2, 0.3, 0.4],
     [0.3, 0.4, 0.6, 0.7],
     [0.6, 0.7, 0.8, 0.9],],
])


shape = samples.shape
fig, axes = plt.subplots(nrows=shape[0], ncols=shape[1], figsize=(4*shape[1], 4*shape[0]))

for sample, ax in zip(samples.reshape(-1,4), axes.flatten()):
    x = sample.reshape(-1, 1)
    archive = mlcs.CandidateArchive(ndim=1)#, fidelities=['high', 'low', 'high-low'])

    y = mff.OD.oneDimensional.high(x)
    archive.addcandidates(x, forrester(x))#, fidelity='high')

    surr = mlcs.Surrogate.fromname('Kriging', archive)#, kernel='Matern')
    surr.retrain()

    ax.plot(xrange, forrester(xrange))
    ax.plot(xrange, surr.predict(xrange.reshape(-1,1)))
    ax.scatter(*archive.getcandidates(), zorder=3, color='C2')