In [None]:
%matplotlib inline
from matplotlib import colors
from mpl_toolkits.axes_grid1 import make_axes_locatable   
from pyDOE import lhs
import matplotlib.pyplot as plt
import numpy as np
import time
import os
import sys
import warnings

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from itertools import product
from more_itertools import chunked
from functools import partial
import multifidelityfunctions as mff
import multiLevelCoSurrogates as mlcs
from sklearn.gaussian_process import GaussianProcessRegressor, kernels
from sklearn.ensemble import RandomForestRegressor

np.random.seed(20160501)  # Setting seed for reproducibility
OD = mff.oneDimensional

from IPython.core.display import clear_output
from pprint import pprint
np.set_printoptions(linewidth=200, edgeitems=10, precision=4, suppress=True)
plot_dir = '../../plots/'
data_dir = '../../files/'

from function_defs import *

In [None]:
sample = np.load(f'{data_dir}2d_test_sample.npy')
plt.scatter(sample[:,0], sample[:,1])
plt.tight_layout()
plt.savefig(f'{plot_dir}2d_sample_distribution.pdf')
plt.show()

# Error distribution

In [None]:
aggregation = ['mean', 'median'][1]
lin = ['', 'lin_'][1]

errors = np.load(f'{data_dir}2d_{lin}error_tracking.npy')

aggregate = {
    'mean': np.mean,
    'median': np.median,
}

mean_errors = aggregate[aggregation](errors, axis=(0,1,2))

In [None]:
norm = colors.LogNorm(vmin=np.min(mean_errors[0]), vmax=np.max(mean_errors[0]))
img = plt.scatter(sample[:,0], sample[:,1], c=mean_errors[0], cmap='viridis_r', norm=norm)
plt.title(f'{aggregation.title()} error - high fidelity (hierarchical) model')
plt.colorbar(img, orientation='vertical')
plt.tight_layout()
plt.savefig(f'{plot_dir}2d_{lin}sample_{aggregation}_error_hierarchical.pdf')
plt.show()

In [None]:
norm = colors.LogNorm(vmin=np.min(mean_errors[1]), vmax=np.max(mean_errors[1]))
img = plt.scatter(sample[:,0], sample[:,1], c=mean_errors[1], cmap='viridis_r', norm=norm)
plt.title(f'{aggregation.title()} error - high fidelity (direct) model')
plt.colorbar(img, orientation='vertical')
plt.tight_layout()
plt.savefig(f'{plot_dir}2d_{lin}sample_{aggregation}_error_high.pdf')
plt.show()

In [None]:
norm = colors.LogNorm(vmin=np.min(mean_errors[2]), vmax=np.max(mean_errors[2]))
img = plt.scatter(sample[:,0], sample[:,1], c=mean_errors[2], cmap='viridis_r', norm=norm)
plt.title(f'{aggregation.title()} error - low fidelity (direct) model')
plt.colorbar(img, orientation='vertical')
plt.tight_layout()
plt.savefig(f'{plot_dir}2d_{lin}sample_{aggregation}_error_low.pdf')
plt.show()

In [None]:
bins = [b*10**e for e in range(-2, 14, 1) for b in range(1, 11, 1)]
for i in range(3):
    plt.hist(errors[:,:,:,i,:].flatten(), label=i, bins=bins, cumulative=-1, log=True, alpha=.5)
# plt.yscale('log')
plt.xscale('log')
plt.legend(loc=0)
plt.tight_layout()
plt.savefig(f'{plot_dir}2d_{lin}sample_error_hist.pdf')
plt.show()

# MSE errors per sample size combination

## Random sampling

In [None]:
if '2d_mse_tracking.npy' in os.listdir(data_dir):
    mse_tracking = np.load(f'{data_dir}2d_mse_tracking.npy')
else:
    mse_tracking = create_mse_tracking(TD_inv, low_random_sample)
    np.save(f'{data_dir}2d_mse_tracking.npy', mse_tracking)

In [None]:
print('median')
pprint([(f'{95+i}%-ile', np.nanpercentile(np.nanmedian(mse_tracking, axis=2).flatten(), 95+i)) for i in range(6)])

In [None]:
name = '2d-high-low-samples-random'
plot_high_vs_low_num_samples(mse_tracking, name, vmax=1000, save_as=f'{plot_dir}{name}.pdf')

In [None]:
name = '2d-high-low-samples-random'
plot_high_vs_low_num_samples_diff(mse_tracking, name, vmax=10000, save_as=f'{plot_dir}{name}_diff.pdf')

## LHS

In [None]:
if '2d_lin_mse_tracking.npy' in os.listdir(data_dir):
    lin_mse_tracking = np.load(f'{data_dir}2d_lin_mse_tracking.npy')
else:
    lin_mse_tracking = create_mse_tracking(TD_inv, low_lhs_sample)
    np.save(f'{data_dir}2d_lin_mse_tracking.npy', lin_mse_tracking)

In [None]:
print('median')
pprint([(f'{95+i}%-ile', np.nanpercentile(np.nanmedian(lin_mse_tracking, axis=2).flatten(), 95+i)) for i in range(6)])

In [None]:
name = '2d-high-low-samples-linear'
plot_high_vs_low_num_samples(lin_mse_tracking, name, vmax=1000, save_as=f'{plot_dir}{name}.pdf')

In [None]:
name = '2d-high-low-samples-linear'
plot_high_vs_low_num_samples_diff(lin_mse_tracking, name, vmax=3000, save_as=f'{plot_dir}{name}_diff.pdf')

## Difference in error between linear and random sample

In [None]:
name = "2D, random - LHS"
plot_inter_method_diff(mse_tracking, lin_mse_tracking, name, save_as=f'{plot_dir}{name}.pdf')