# Example about how to add a dataset catalog to OpenVisus

Using the idea of `visus.config` but centrally managed in a catalog file.
To maintain backward compatibility, the catalog is stored in XML format, but it can be migrated to JSON and/or YAML in the future.


## Import the necessary libraries



In [1]:
import os,sys,logging,time
import numpy as np

# needed only in debug mode
debug_mode=True
if debug_mode:
  sys.path.append("c:/projects/openvisus/build/RelWithDebInfo")
  sys.path.append("c:/projects/openvisuspy/src")

import OpenVisus as ov
import openvisuspy
print(time.time(),"OpenVisus imported")

1751003265.7636766 OpenVisus imported


In [2]:
## Catalog source code, to be integrated into openvisuspy

In [None]:
import os,requests

# change as needed. Valerio mentioned the necessity to have a default catalog, embedded in the Python package, in case the remote catalog is not available.
DEFAULT_CATALOG = """
    <datasets>
        <dataset name='2kbit1' url='http://atlantis.sci.utah.edu/mod_visus?dataset=2kbit1' >
            <access type='network' chmod='r' compression='zip'  />
        </dataset>
    </datasets>
    """

# ///////////////////////////////////////////////////////////////////////////
def LoadCatalog(url:str=None)-> str:

  """
  url: str or None
  - None: use the default catalog
  - str: URL to the catalog, can be a local file path (i.e. ~/visus.config) or a remote URL (`http://nationalsciencedatafabric.org/catalog.config`).
  - str: body of the catalog in XML format (to be extended to JSON and YAML too?).
  """

  if url is None:
    return DEFAULT_CATALOG

  local_path=os.path.expanduser(url)
  if os.path.exists(local_path):
    with open(local_path, 'r') as f:
        return f.read()

  if url.startswith('http://') or url.startswith('https://'):
    response = requests.get(url)
    response.raise_for_status()
    return response.text



  # assuming url is a string containing the catalog
  body=url
  try:
    if ov.StringTree.fromString(body).valid():
      return body

  except :
    pass
  
  raise ValueError("Invalid url. Please provide None | valid url | XML document.")


# ///////////////////////////////////////////////////////////////////////////
def LoadDataset(name:str, catalog:str=None):

  """
  extend the current openviuspy to support loading datasets from a catalog.
  """

  if catalog is None:
    # no catalog provided, load dataset directly (standard behavior)
    return ovpy.LoadDataset(name)

  else:
    # need to load dataset from the provided catalog
    stree = ov.StringTree.fromString(catalog)
    N=stree.getNumberOfChilds()
    for I in range(N):
        child = stree.getChild(I)
        if child.name=="dataset" and child.getAttribute("name") == name:
            return ov.LoadDatasetEx(child)

  raise ValueError(f"Dataset '{name}' not found in the catalog.")



## Example of usage of default catalog

In [44]:
catalog=LoadCatalog()
db=LoadDataset("2kbit1", catalog=catalog)
body=db.getDatasetBody().toString()
print(body)

<dataset name="2kbit1" url="http://atlantis.sci.utah.edu/mod_visus?dataset=2kbit1" typename="IdxDataset">
	<access type="network" chmod="r" compression="zip" />
	<idxfile>
		<version value="6" />
		<bitmask value="V012012012012012012012012012012012" />
		<box value="0 2048 0 2048 0 2048" />
		<bitsperblock value="16" />
		<blocksperfile value="256" />
		<block_interleaving value="0" />
		<filename_template value="./visus/%02x/%04x.bin" />
		<missing_blocks value="False" />
		<arco value="0" />
		<time_template value="" />
		<axis value="" />
		<field name="DATA" description="" index="" default_compression="zip" default_layout="hzorder" default_value="0" filter="" dtype="uint8" />
		<timestep when="0" />
	</idxfile>
</dataset>


## Example of usage using a local catalog file

In [43]:
catalog=LoadCatalog(r"D:\visus-datasets\datasets.config")
db=LoadDataset("2kbit1", catalog=catalog)
body=db.getDatasetBody().toString()
print(body)

<dataset name="2kbit1" url="file://D:/visus-datasets/2kbit1/zip/hzorder/visus.idx" permissions="public" typename="IdxDataset">
	<idxfile>
		<version value="6" />
		<bitmask value="V012012012012012012012012012012012" />
		<box value="0 2048 0 2048 0 2048" />
		<bitsperblock value="16" />
		<blocksperfile value="256" />
		<block_interleaving value="0" />
		<filename_template value="./visus/%02x/%04x.bin" />
		<missing_blocks value="False" />
		<arco value="0" />
		<time_template value="time_%02d/" />
		<axis value="" />
		<field name="DATA" description="" index="" default_compression="zip" default_layout="0" default_value="0" filter="" dtype="uint8" />
		<timestep from="0" to="1" step="1" />
	</idxfile>
</dataset>


## Example of usage using a remote catalog file

In [45]:
catalog=LoadCatalog("https://raw.githubusercontent.com/sci-visus/openvisuspy/refs/heads/main/catalogs/default.config")
db=LoadDataset("2kbit1", catalog=catalog)
body=db.getDatasetBody().toString()
print(body)

<dataset name="2kbit1" url="http://atlantis.sci.utah.edu/mod_visus?dataset=2kbit1" typename="IdxDataset">
	<access type="network" chmod="r" compression="zip" />
	<idxfile>
		<version value="6" />
		<bitmask value="V012012012012012012012012012012012" />
		<box value="0 2048 0 2048 0 2048" />
		<bitsperblock value="16" />
		<blocksperfile value="256" />
		<block_interleaving value="0" />
		<filename_template value="./visus/%02x/%04x.bin" />
		<missing_blocks value="False" />
		<arco value="0" />
		<time_template value="" />
		<axis value="" />
		<field name="DATA" description="" index="" default_compression="zip" default_layout="hzorder" default_value="0" filter="" dtype="uint8" />
		<timestep when="0" />
	</idxfile>
</dataset>
