In [None]:
# default_exp io

# Input/Output basics for the pyciss2 package

> This module manages where downloaded data is stored via a config
file. It also has a PathManager to support finding the paths to files
of interest.


In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import toml
import logging
from collections import OrderedDict
from pathlib import Path

import pandas as pd

logger = logging.getLogger(__name__)

configpath = Path.home() / '.pyciss.toml'

In [None]:
#export
def get_config(p=None):
    """Read the configfile and return config dict.

    Returns
    -------
    dict
        Dictionary with the content of the configpath file.
    """
    if p is None:
        p = configpath
    if not p.exists():
        raise IOError("Config file {} not found.".format(str(p)))
    else:
        with open(p) as f:
            config = toml.load(f)
        return config

## Configfile

The config file just contains one path to the folder where all data for `pyciss` will be stored.

If it doesn't exist yet, the following code helps you to create it.

In [None]:
from fastcore.test import test_fail

test_fail(get_config, msg=f"Config file {configpath} not found.")  # should throw above IOError when these

AssertionError: Expected exception but none raised. Config file /home/maye/.pyciss.toml not found.

In [None]:
#export
if not configpath.exists():
    print("No configuration file {} found.\n".format(configpath))
    print("Please run `pyciss.io.set_database_path(path)` and provide the path where\n"
          "you want to keep your automatically downloaded images.")
    print("`pyciss` will store this path in {}, where you can easily change it later."
          .format(configpath))
else:
    config = get_config()

In [None]:
#export
def set_database_path(dbfolder):
    """Use to write the database path into the config.

    Parameters
    ----------
    dbfolder : str or pathlib.Path
        Path to where pyciss will store the ISS images it downloads and receives.
    """
    try:
        d = get_config()
    except IOError:  # meaning we need an empy config
        d = {}
    d['path'] = dbfolder
    with configpath.open('w') as f:
        toml.dump(d, f)
    print("Saved database path into {}.".format(configpath))

In [None]:
# import tempfile

# tmpdir = tempfile.TemporaryDirectory()

# set_database_path(tmpdir)

In [None]:
#export
def get_db_root():
    "Read dbroot folder from config and mkdir if required."
    d = get_config()
    dbroot = Path(d['path'])
    dbroot.mkdir(exist_ok=True)
    return dbroot

In [None]:
#export
def db_mapped_cubes():
    return get_db_root().glob("**/*cal.dst.map.cub")


def db_label_paths():
    return get_db_root().glob("*.LBL")

In [None]:
def print_db_stats():
    """Print database stats.

    Returns
    -------
    pd.DataFrame
        Table with the found data items per type.
    """
    dbroot = get_db_root()
    print(f"Database location: {dbroot}")
    n_ids = len(list(dbroot.glob("[N,W]*")))
    print("Number of WACs and NACs in database: {}".format(n_ids))
    print("These kind of data are in the database: (returning pd.DataFrame)")
    d = {}
    for key, val in PathManager.extensions.items():
        d[key] = [len(list(dbroot.glob("**/*" + val)))]
    return pd.DataFrame(d)

In [None]:
#export
from collections import OrderedDict

class PathManager:

    """Manage paths to data in database.

    The `.pyciss.toml` config file determines the path to the database for ISS images.
    With this class you can access the different kind of files conveniently.

    Using the stored extensions dictionary, the attributes of the object listed here are created
    dynamically at object initialization and when the image_id is being set.

    NOTE
    ----
    This class will read the .pyciss.toml to define the dbroot path, but
    one can also call it with the savedir argument to override that.

    Parameters
    ----------
    img_id : str or pathlib.Path
        The N... or W... image identifier string of CISS images or the absolute
        path to an existing image
    savedir : str or pathlib.Path
        Path to the pyciss image database. By default defined by what's found in
        the .pyciss.yaml config, but can be overridden using this parameter.

    Attributes
    ----------
    basepath
    img_id
    calib_img
    calib_label
    raw_image
    raw_cub
    raw_label
    cubepath
    tif
    undestriped
    """

    d = {
        'cubepath': '.cal.dst.map.cub',
        'cal_cub': '.cal.cub',
        'dst_cub': '.cal.dst.cub',
        'raw_cub': '.cub',
        'raw_label': '.LBL',
        'raw_image': '.IMG',
        'calib_img': '_CALIB.IMG',
        'calib_label': '_CALIB.LBL',
        'tif': '.cal.dst.map.tif',
        'undestriped': '.cal.map.cub'
    }
    # ordered, sorted by key:
    extensions = OrderedDict(sorted(d.items(), key=lambda t: t[0]))

    def __init__(self, img_id, savedir=None):
        img_id = img_id.upper()
        self.input_img_id = img_id
        if Path(img_id).is_absolute():
            # the split is to remove the _1.IMG or _2.IMG from the path
            # for the image id.
            self._id = Path(img_id).name.split('_')[0]
        else:
            # I'm using only filename until _ for storage
            # TODO: Could this create a problem?
            self._id = img_id[:11]
        if savedir is None:
            self.dbroot = get_db_root()
        else:
            self.dbroot = Path(savedir)

        self.set_version()
        self.set_attributes()

    def set_version(self):
        id_ = Path(self.input_img_id).name
        if len(id_) > 11:
            self.version = id_.split('_')[1].split('.')[0]
        else:
            # if the given id was without version, check if a raw file is in database:
            try:
                rawpath = next(self.basepath.glob(self.img_id + "_?.IMG")).name
            except StopIteration:
                self.version = '0'
            else:
                self.version = rawpath[12]

    @property
    def basepath(self):
        return self.dbroot / self._id

    @property
    def img_id(self):
        return self._id

    @img_id.setter
    def img_id(self, value):
        self._id = value
        self.set_attributes()

    def set_attributes(self):
        for k, v in self.extensions.items():
            path = self.basepath / ("{}_{}{}".format(self.img_id,
                                                     self.version,
                                                     v))
            setattr(self, k, path)

    def __str__(self):
        self.set_version()
        self.set_attributes()  # in case there were changes
        s = ''
        for k, v in self.extensions.items():
            s += "{}: ".format(k)
            path = getattr(self, k)
            if path.exists():
                s += "{}\n".format(path)
            else:
                s += "not found.\n"
        return s

    def __repr__(self):
        return self.__str__()

In [None]:
#export
class DBManager():
    def __init__(self):
        self.dbroot = get_db_root()

    def print_stats(self):
        print_db_stats()