In [None]:
#default_exp io

# Input/Output

> This module manages where downloaded data is stored via a config
file. It also provides a PathManager class to retrieve the paths to files in
the archive.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import toml
import logging
from collections import OrderedDict
from pathlib import Path
import os
import pandas as pd

logger = logging.getLogger()

## Configfile

The config file for now just contains one path to the folder where all data for `pyciss` will be stored.

In [None]:
#export
class Config:
    """Manage config stuff.

    Attributes
    -------
    path: pathlib.Path
    
    The key, value pairs found in the config file become attributes of the
    class instance after initialization.
    At minimum, there should be the `archive_path` attribute for storing data
    for this package.
    """
    # This enables a config path location override using env PYCISS_CONFIG
    path = Path(os.getenv("PYCISS_CONFIG", Path.home() / '.pyciss.toml'))

    def __init__(self, other_config=None):
        "Switch to other config file location with `other_config`."
        if other_config is not None:
            self.path = Path(other_config)
        self.get_config()

    def get_config(self):
        "Read the configfile and store config dict."
        p = self.path
        if not p.exists():
            self.not_found()
        else:
            with open(p) as f:
                self.d = toml.load(f)
                if not self.d:
                    self.not_found()
                # all found key.value pairs become attributes
                for k, v in self.d.items():
                    setattr(self, k, v)

    def not_found(self):
        """Use input to ask user for the archive_path. 
        
        The path will be stored in the config file `Class.path` (either default or as given
        during init.)
        """
        path = input("Provide path where all package data will be stored:")
        d = {}
        d["archive_path"] = path
        self.archive_path = path
        with self.path.open("w") as f:
            toml.dump(d, f)

In [None]:
#exports
config = Config()

In [None]:
config.archive_path

'/home/maye/big_drive/planetary_data/pyciss'

The `Config` object init will ask for an input path if the config file is not found or empty:

In [None]:
show_doc(Config.not_found)

<h4 id="Config.not_found" class="doc_header"><code>Config.not_found</code><a href="__main__.py#L37" class="source_link" style="float:right">[source]</a></h4>

> <code>Config.not_found</code>()

```
Use input to ask user for the archive_path. 

The path will be stored in the config file `Class.path` (either default or as given
during init.)
```

To test this, we need to catch the UI `input()` call and provide a default output that would be entered as the `archive_path` into the `Config` object.

First, we create a random temporary file for a new config:

In [None]:
# create tempfile for testing
import tempfile
f = tempfile.NamedTemporaryFile()
f.name

'/tmp/tmp349bqoil'

Now we store the pointer to the `input` function as a backup, and patch the `input` function to return a test archive_path:

In [None]:
#store pointer of input function
old_input = input
# patch input function for testing
input = lambda x: "/some/path"

Now we create a new Config object. As it has no archive path in it, the patched `input` call of `Class.not_found` will return the above test path.
In reality, the user will provide the path to the `input` prompt:

In [None]:
# create new Config object with temporayr
c = Config(f.name)

assert toml.load(f.name)['archive_path'] == "/some/path"

#return stored pointer to the input function:
input = old_input

In [None]:
#export
def get_db_root():
    dbroot = Path(config.archive_path)
    dbroot.mkdir(exist_ok=True)
    return dbroot

In [None]:
get_db_root()

Path('/home/maye/big_drive/planetary_data/pyciss')

## PathManager

> The `PathManager` provides access to the paths in the archive, based on class attributes that reflect the processing level of the file:

In [None]:
#export
from collections import OrderedDict

class PathManager:

    """Manage paths to data in database.

    The `.pyciss.toml` config file determines the path to the database for ISS images.
    With this class you can access the different kind of files conveniently.

    Using the stored extensions dictionary, the attributes of the object listed here are created
    dynamically at object initialization and when the image_id is being set.

    NOTE
    ----
    This class will read the .pyciss.toml to define the dbroot path, but
    one can also call it with the savedir argument to override that.

    Parameters
    ----------
    img_id : str or pathlib.Path
        The N... or W... image identifier string of CISS images or the absolute
        path to an existing image
    savedir : str or pathlib.Path
        Path to the pyciss image database. By default defined by what's found in
        the .pyciss.yaml config, but can be overridden using this parameter.

    Attributes
    ----------
    basepath
    img_id
    calib_img
    calib_label
    raw_image
    raw_cub
    raw_label
    cubepath
    tif
    undestriped
    """

    d = {
        'cubepath': '.cal.dst.map.cub',
        'cal_cub': '.cal.cub',
        'dst_cub': '.cal.dst.cub',
        'raw_cub': '.cub',
        'raw_label': '.LBL',
        'raw_image': '.IMG',
        'calib_img': '_CALIB.IMG',
        'calib_label': '_CALIB.LBL',
        'tif': '.cal.dst.map.tif',
        'undestriped': '.cal.map.cub'
    }
    # ordered, sorted by key:
    extensions = OrderedDict(sorted(d.items(), key=lambda t: t[0]))

    def __init__(self, img_id, savedir=None):
        img_id = img_id.upper()
        self.input_img_id = img_id
        if Path(img_id).is_absolute():
            # the split is to remove the _1.IMG or _2.IMG from the path
            # for the image id.
            self._id = Path(img_id).name.split('_')[0]
        else:
            # I'm using only filename until _ for storage
            # TODO: Could this create a problem?
            self._id = img_id[:11]
        if savedir is None:
            self.dbroot = get_db_root()
        else:
            self.dbroot = Path(savedir)

        self.set_version()
        self.set_attributes()

    def set_version(self):
        id_ = Path(self.input_img_id).name
        if len(id_) > 11:
            self.version = id_.split('_')[1].split('.')[0]
        else:
            # if the given id was without version, check if a raw file is in database:
            try:
                rawpath = next(self.basepath.glob(self.img_id + "_?.IMG")).name
            except StopIteration:
                self.version = '0'
            else:
                self.version = rawpath[12]

    @property
    def basepath(self):
        return self.dbroot / self._id

    @property
    def img_id(self):
        return self._id

    @img_id.setter
    def img_id(self, value):
        self._id = value
        self.set_attributes()

    def set_attributes(self):
        for k, v in self.extensions.items():
            path = self.basepath / f"{self.img_id}_{self.version}{v}"
            setattr(self, k, path)

    def __str__(self):
        self.set_version()
        self.set_attributes()  # in case there were changes
        s = ''
        for k, v in self.extensions.items():
            s += "{}: ".format(k)
            path = getattr(self, k)
            if path.exists():
                s += "{}\n".format(path)
            else:
                s += "not found.\n"
        return s

    def __repr__(self):
        return self.__str__()

In [None]:
#export
def db_mapped_cubes():
    return get_db_root().glob("**/*cal.dst.map.cub")


def db_label_paths():
    return get_db_root().glob("*.LBL")

In [None]:
list(db_mapped_cubes())

[Path('/home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1.cal.dst.map.cub')]

In [None]:
pm = PathManager('N1875229393')

In [None]:
pm

cal_cub: /home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1.cal.cub
calib_img: /home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1_CALIB.IMG
calib_label: /home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1_CALIB.LBL
cubepath: /home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1.cal.dst.map.cub
dst_cub: /home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1.cal.dst.cub
raw_cub: /home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1.cub
raw_image: /home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1.IMG
raw_label: /home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1.LBL
tif: /home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1.cal.dst.map.tif
undestriped: /home/maye/big_drive/planetary_data/pyciss/N1875229393/N1875229393_1.cal.map.cub

In [None]:
pm.extensions

OrderedDict([('cal_cub', '.cal.cub'),
             ('calib_img', '_CALIB.IMG'),
             ('calib_label', '_CALIB.LBL'),
             ('cubepath', '.cal.dst.map.cub'),
             ('dst_cub', '.cal.dst.cub'),
             ('raw_cub', '.cub'),
             ('raw_image', '.IMG'),
             ('raw_label', '.LBL'),
             ('tif', '.cal.dst.map.tif'),
             ('undestriped', '.cal.map.cub')])

In [None]:
def print_db_stats():
    """Print database stats.

    Returns
    -------
    pd.DataFrame
        Table with the found data items per type.
    """
    dbroot = get_db_root()
    print(f"Database location: {dbroot}")
    n_ids = len(list(dbroot.glob("[N,W]*")))
    print("Number of WACs and NACs in database: {}".format(n_ids))
    print("These kind of data are in the database: (returning pd.DataFrame)")
    d = {}
    for key, val in PathManager.extensions.items():
        d[key] = [len(list(dbroot.glob("**/*_?" + val)))]
    return pd.DataFrame(d)

In [None]:
print_db_stats()

Database location: /home/maye/big_drive/planetary_data/pyciss
Number of WACs and NACs in database: 1
These kind of data are in the database: (returning pd.DataFrame)


Unnamed: 0,cal_cub,calib_img,calib_label,cubepath,dst_cub,raw_cub,raw_image,raw_label,tif,undestriped
0,1,1,1,1,1,1,1,1,1,1


In [None]:
#export
class DBManager():
    """Helper class for the whole archive."""
    def __init__(self):
        self.dbroot = get_db_root()

    def print_stats(self):
        print_db_stats()

In [None]:
#export
def filename_from_obsid(obsid):
    tokens = obsid.split('_')
    return f"{tokens[-1]}{tokens[-2]}"

In [None]:
from nbdev.export import notebook2script; notebook2script()

Converted 00_io.ipynb.
Converted 01_opusapi.ipynb.
Converted 02_pipeline.ipynb.
Converted 03_ringutils.ipynb.
Converted 04_plotting.ipynb.
Converted 05_index.ipynb.
Converted 06_ringcube.ipynb.
Converted index.ipynb.
Converted to_be_implemented.ipynb.
