## Create test datasets for 1,2,3,4,5 sigma widths

In [50]:
%pylab inline
from Chempy.parameter import ModelParameters
from Chempy.cem_function import posterior_function_returning_predictions

def test_dataset(width,size):
    """
    Create test dataset for fixed gaussian width.
    The data points are randomly distributed along a uniform distribution with fixed width in parameter space.
    
    Input: width of test dataset
    """
    
    import warnings
    warnings.filterwarnings("ignore")
    
    a = ModelParameters()
    
    lower = np.zeros(len(a.p0))
    upper = np.zeros(len(a.p0))

    # Set upper/lower bounds in parameter space
    for i,param_name in enumerate(a.to_optimize):
        lower[i], upper[i] = a.constraints.get(param_name)
        
    sigma = []
    for i,param_name in enumerate(a.to_optimize):
        sigma.append(a.priors.get(param_name)[1])

    param_grid = []
    abundance_grid = []
    for i in range(size):
        if i%10==0:
            print('Calculating sample %d of %d' %(i+1,size))
        param = np.random.uniform(a.p0-width*np.array(sigma),a.p0+width*np.array(sigma))
        pred,_ = posterior_function_returning_predictions((param,a))
        param_grid.append(list(param))
        abundance_grid.append(list(pred))
       
    np.save('SingleElement/'+str(width)+'_sigma_param_grid.npy',param_grid)
    np.save('SingleElement/'+str(width)+'_sigma_abundances.npy',abundance_grid)
    return None

Populating the interactive namespace from numpy and matplotlib


In [51]:
test_dataset(2,3)

Calculating sample 1 of 3


In [None]:
def verification_and_testing():
	""" This will create the verification and testing data-sets for use with the neural network.
	The data-sets are created randomly from the Gaussian prior distribution, within the bounds set in the parameter file

	Outputs (saved as .npy files in the Neural/ folder):
		verif_param_grid - Verification parameter data
		verif_abundances - Verification dataset abundances
		test_param_grid - Test parameter data
		test_abundances - Test dataset abundances

	"""

	# FOR TESTING
	import warnings
	warnings.filterwarnings("ignore")

	a = ModelParameters()
	names = ['verif','test'] # Two datasets

	lower = np.zeros(len(a.p0))
	upper = np.zeros(len(a.p0))

	# Set upper/lower bounds in parameter space
	for i,param_name in enumerate(a.to_optimize):
		lower[i], upper[i] = a.constraints.get(param_name)

	for j ,name in enumerate(names): # Create both test sets
		param_grid = []
		model_abundances = []
		for k in range(a.verif_test_sizes[j]):
			param = np.ones(len(a.p0))*np.inf # To ensure initial value is not in range
			for i in range(len(a.p0)):
				param[i] = np.inf
				while param[i] > upper[i] or param[i] < lower[i]: # Continue until param is in correct range
					param[i] = np.random.normal(loc=a.p0[i],scale=a.test_widths[i])
			param_grid.append(param)
			abundances,_ = posterior_function_returning_predictions((param,a))
			model_abundances.append(abundances)
			if k%100==0 :
				print("Calculating %s abundance set %d of %d" %(name,k,a.verif_test_sizes[j]))
		np.save("Neural/"+name+"_param_grid.npy",param_grid)
		np.save("Neural/"+name+"_abundances.npy",model_abundances)

	return None
