In [1]:
from pathlib import Path

import h5py
from archnemesis.helpers import h5py_helper

from archnemesis.database.filetypes.ans_line_data_file import AnsLineDataFile
from archnemesis.database.filetypes.ans_partition_fn_data_file import AnsPartitionFunctionDataFile



RECREATE_TEST_FILES = False

test_data_dir = Path("./test_data")


original_source_file = test_data_dir / "hitran24.h5"

combined_external_source_file = original_source_file.with_stem(original_source_file.stem + '_with_external_sources')

test_file_subset_root = original_source_file.with_stem(original_source_file.stem + '_external_subset_root')
test_file_subset_source = original_source_file.with_stem(original_source_file.stem + '_external_subset_source')
test_file_subset_badname = original_source_file.with_stem(original_source_file.stem + '_external_subset_badname')


any_test_file_exists = False
for test_file in (combined_external_source_file, test_file_subset_root, test_file_subset_source, test_file_subset_badname):
	if test_file.exists():
		if RECREATE_TEST_FILES:
			test_file.unlink()
		else:
			print(f'Test file "{test_file}" exists, so will not create any test files')
			any_test_file_exists |= True


if any_test_file_exists:
	print('Will only create test files if no test files exist. Either delete them or set `RECREATE_TEST_FILES` to True')

else:

	# Choose molecules to add to test dataset
	mols_1 = ['CH4', 'H2O'] # add to `test_file_subset_root`
	mols_2 = ['H2S', 'CO', 'CO2'] # add as one source in `test_file_subset_source`
	mols_3 = ['H2', 'GeH4'] # add as another source in `test_file_subset_source`
	mols_4 = ['O2', 'O3'] #  add in `test_file_subset_badname`

	target_group_names = ('partition_function', 'line_data')

	for target_group_name in target_group_names:

		with h5py.File(original_source_file, 'r') as f:
			with h5py.File(test_file_subset_root, 'a') as g:
				xpf_grp = h5py_helper.ensure_grp(g, target_group_name)
				for mol in mols_1:
					f.copy(f'/sources/HITRAN24/{target_group_name}/{mol}', xpf_grp)
			
			with h5py.File(test_file_subset_source, 'a') as g:
				s_grp = h5py_helper.ensure_grp(g, 'sources')
				xs_grp_1 = h5py_helper.ensure_grp(s_grp, 'HITRAN24')
				pfxs_grp_1 = h5py_helper.ensure_grp(xs_grp_1, target_group_name)
				
				xs_grp_2 = h5py_helper.ensure_grp(s_grp, 'TEST_SOURCE')
				pfxs_grp_2 = h5py_helper.ensure_grp(xs_grp_2, target_group_name)
				
				for mol in mols_2:
					f.copy(f'/sources/HITRAN24/{target_group_name}/{mol}', pfxs_grp_1)
				
				for mol in mols_3:
					f.copy(f'/sources/HITRAN24/{target_group_name}/{mol}', pfxs_grp_2)
			
			with h5py.File(test_file_subset_badname, 'a') as g:
				xpf_grp = h5py_helper.ensure_grp(g, f'bad_name_{target_group_name}')
				for mol in mols_4:
					f.copy(f'/sources/HITRAN24/{target_group_name}/{mol}', xpf_grp)


		source_map = {
			'external_filename_only' : str(test_file_subset_root.relative_to(combined_external_source_file.parent)),
			'external_source_hitran24' : (str(test_file_subset_source.relative_to(combined_external_source_file.parent)), f'/sources/HITRAN24/{target_group_name}'),
			'external_source_test' : (str(test_file_subset_source.relative_to(combined_external_source_file.parent)), f'/sources/TEST_SOURCE/{target_group_name}'),
			'external_file_with_group' : (str(test_file_subset_badname.relative_to(combined_external_source_file.parent)), f'/bad_name_{target_group_name}'),
		}

		with h5py.File(combined_external_source_file, 'a') as f:
			s_grp = h5py_helper.ensure_grp(f, 'sources')
			for source_name, source_info in source_map.items():
				sx_grp = h5py_helper.ensure_grp(s_grp, source_name)
				
				if isinstance(source_info, str):
					h5py_helper.ensure_dataset(sx_grp, target_group_name, shape=tuple(), data=source_info, dtype='T')
				
				elif isinstance(source_info, tuple) and (len(source_info)==2):	
					h5py_helper.ensure_dataset(sx_grp, target_group_name, shape=(2,), data=source_info, dtype='T')
				else:
					raise TypeError(f'{source_name=} {source_info=}. `source_info` should be a string or a tuple of two strings')



# After test data is created ensure that the sources are linked with virtual datasets properly

# Ensure partition file is updated from the sources
pf_file_with_external_sources = AnsPartitionFunctionDataFile(combined_external_source_file)
pf_file_with_external_sources.update_from_sources()

# Ensure linedata file is updated from the sources
ld_file_with_external_sources = AnsLineDataFile(combined_external_source_file)
ld_file_with_external_sources.update_from_sources()



INFO :: validate_partition_function_group :: ans_partition_fn_data_file.py-148 :: Validation for "/partition_function" in "test_data/hitran24_with_external_sources.h5" succeeded
INFO :: validate_line_data_group :: ans_line_data_file.py-121 :: Validation for "/bad_name_line_data" in "test_data/hitran24_external_subset_badname.h5" succeeded
INFO :: validate_line_data_group :: ans_line_data_file.py-121 :: Validation for "/line_data" in "test_data/hitran24_external_subset_root.h5" succeeded
INFO :: validate_line_data_group :: ans_line_data_file.py-121 :: Validation for "/sources/HITRAN24/line_data" in "test_data/hitran24_external_subset_source.h5" succeeded
INFO :: validate_line_data_group :: ans_line_data_file.py-121 :: Validation for "/sources/TEST_SOURCE/line_data" in "test_data/hitran24_external_subset_source.h5" succeeded
INFO :: validate_line_data_group :: ans_line_data_file.py-121 :: Validation for "/line_data" in "test_data/hitran24_with_external_sources.h5" succeeded
