In [None]:
# default_exp io

# Input/Output

> This module manages where downloaded data is stored via a config
file. It also provides a PathManager class to retrieve the paths to files in
the archive.

In [None]:
# hide
from nbdev.showdoc import show_doc

In [None]:
# export
import logging
import os
from collections import OrderedDict
from pathlib import Path
import pandas as pd
import toml

logger = logging.getLogger()

In [None]:
from planetarypy.ciss import storage_root
storage_root

Path('/home/maye/big_drive/planetary_data/missions/cassini/iss')

## PathManager

> The `PathManager` provides access to the paths in the archive, based on class attributes that reflect the processing level of the file:

In [None]:
# export
from collections import OrderedDict


class PathManager:

    """Manage paths to data in database.

    `planetarypy.ciss.storage_root` determines the path to the database for ISS images.
    With this class you can access the different kind of files in that folder conveniently.

    Using the stored extensions dictionary, the attributes of the object listed here are created
    dynamically at object initialization and when the image_id is being set.

    NOTE
    ----
    This class will use the archive path as designated by `planetarypy.ciss`, but
    one can also call it with the savedir argument to override that.

    Parameters
    ----------
    img_id : str or pathlib.Path
        The N... or W... image identifier string of CISS images or the absolute
        path to an existing image
    savedir : str or pathlib.Path
        Path to the pyciss image database. By default defined by what's given by
        `planetarypy.ciss.storage_root`, but can be overridden using this parameter.

    Attributes
    ----------
    basepath
    img_id
    calib_img
    calib_label
    raw_image
    raw_cub
    raw_label
    cubepath
    tif
    undestriped
    """

    d = {
        "cubepath": ".cal.dst.map.cub",
        "cal_cub": ".cal.cub",
        "dst_cub": ".cal.dst.cub",
        "raw_cub": ".cub",
        "raw_label": ".LBL",
        "raw_image": ".IMG",
        "calib_img": "_CALIB.IMG",
        "calib_label": "_CALIB.LBL",
        "tif": ".cal.dst.map.tif",
        "undestriped": ".cal.map.cub",
    }
    # ordered, sorted by key:
    extensions = OrderedDict(sorted(d.items(), key=lambda t: t[0]))

    def __init__(self, img_id, savedir=None):
        try:  # if img_id is a string, meaning the img_id:
            img_id = img_id.upper()
        except AttributeError:  # now assuming some kind of path
            if Path(img_id).is_absolute():
                # the split is to remove the _1.IMG or _2.IMG from the path
                # for the image id.
                self._id = Path(img_id).name.split("_")[0]
                print(self._id)
            else:
                # I'm using only filename until _ for storage
                # TODO: Could this create a problem?
                self._id = img_id[:11]
        else:
            self._id = img_id
        if savedir is None:
            self.dbroot = storage_root
        else:
            self.dbroot = Path(savedir)

        self.set_version()
        self.set_attributes()

    def set_version(self):
        id_ = Path(self._id).name
        if len(id_) > 11:
            self.version = id_.split("_")[1].split(".")[0]
        else:
            # if the given id was without version, check if a raw file is in database:
            try:
                rawpath = next(self.basepath.glob(self.img_id + "_?.IMG")).name
            except StopIteration:
                self.version = "0"
            else:
                self.version = rawpath[12]

    @property
    def basepath(self):
        return self.dbroot / self._id

    @property
    def img_id(self):
        return self._id

    @img_id.setter
    def img_id(self, value):
        self._id = value
        self.set_attributes()

    def set_attributes(self):
        for k, v in self.extensions.items():
            path = self.basepath / f"{self.img_id}_{self.version}{v}"
            setattr(self, k, path)

    def __str__(self):
        self.set_version()
        self.set_attributes()  # in case there were changes
        s = ""
        for k, v in self.extensions.items():
            s += "{}: ".format(k)
            path = getattr(self, k)
            if path.exists():
                s += "{}\n".format(path)
            else:
                s += "not found.\n"
        return s

    def __repr__(self):
        return self.__str__()

In [None]:
# export
def db_mapped_cubes():
    return storage_root.glob("**/*cal.dst.map.cub")


def db_label_paths():
    return storage_root.glob("*.LBL")

In [None]:
list(db_mapped_cubes())[:5]

[Path('/home/maye/big_drive/planetary_data/missions/cassini/iss/N1595341067/N1595341067_1.cal.dst.map.cub'),
 Path('/home/maye/big_drive/planetary_data/missions/cassini/iss/N1625954542/N1625954542_1.cal.dst.map.cub'),
 Path('/home/maye/big_drive/planetary_data/missions/cassini/iss/N1625962190/N1625962190_1.cal.dst.map.cub'),
 Path('/home/maye/big_drive/planetary_data/missions/cassini/iss/N1591681327/N1591681327_1.cal.dst.map.cub'),
 Path('/home/maye/big_drive/planetary_data/missions/cassini/iss/N1585780709/N1585780709_1.cal.dst.map.cub')]

In [None]:
pm = PathManager("N1875229393")

In [None]:
pm

cal_cub: /home/maye/big_drive/planetary_data/missions/cassini/iss/N1875229393/N1875229393_1.cal.cub
calib_img: /home/maye/big_drive/planetary_data/missions/cassini/iss/N1875229393/N1875229393_1_CALIB.IMG
calib_label: /home/maye/big_drive/planetary_data/missions/cassini/iss/N1875229393/N1875229393_1_CALIB.LBL
cubepath: /home/maye/big_drive/planetary_data/missions/cassini/iss/N1875229393/N1875229393_1.cal.dst.map.cub
dst_cub: /home/maye/big_drive/planetary_data/missions/cassini/iss/N1875229393/N1875229393_1.cal.dst.cub
raw_cub: /home/maye/big_drive/planetary_data/missions/cassini/iss/N1875229393/N1875229393_1.cub
raw_image: /home/maye/big_drive/planetary_data/missions/cassini/iss/N1875229393/N1875229393_1.IMG
raw_label: /home/maye/big_drive/planetary_data/missions/cassini/iss/N1875229393/N1875229393_1.LBL
tif: /home/maye/big_drive/planetary_data/missions/cassini/iss/N1875229393/N1875229393_1.cal.dst.map.tif
undestriped: /home/maye/big_drive/planetary_data/missions/cassini/iss/N1875229393

In [None]:
pm.extensions

OrderedDict([('cal_cub', '.cal.cub'),
             ('calib_img', '_CALIB.IMG'),
             ('calib_label', '_CALIB.LBL'),
             ('cubepath', '.cal.dst.map.cub'),
             ('dst_cub', '.cal.dst.cub'),
             ('raw_cub', '.cub'),
             ('raw_image', '.IMG'),
             ('raw_label', '.LBL'),
             ('tif', '.cal.dst.map.tif'),
             ('undestriped', '.cal.map.cub')])

In [None]:
def print_db_stats():
    """Print database stats.

    Returns
    -------
    pd.DataFrame
        Table with the found data items per type.
    """
    dbroot = storage_root
    print(f"Database location: {dbroot}")
    n_ids = len(list(dbroot.glob("[N,W]*")))
    print("Number of WACs and NACs in database: {}".format(n_ids))
    print("These kind of data are in the database: (returning pd.DataFrame)")
    d = {}
    for key, val in PathManager.extensions.items():
        d[key] = [len(list(dbroot.glob("**/*_?" + val)))]
    return pd.DataFrame(d)

In [None]:
print_db_stats()

Database location: /home/maye/big_drive/planetary_data/missions/cassini/iss
Number of WACs and NACs in database: 24
These kind of data are in the database: (returning pd.DataFrame)


Unnamed: 0,cal_cub,calib_img,calib_label,cubepath,dst_cub,raw_cub,raw_image,raw_label,tif,undestriped
0,23,3,2,23,23,23,24,24,23,23


In [None]:
# export
class DBManager:
    """Helper class for the whole archive."""

    def __init__(self):
        self.dbroot = get_db_root()

    def print_stats(self):
        print_db_stats()

In [None]:
# export
def filename_from_obsid(obsid):
    tokens = obsid.split("_")
    return f"{tokens[-1]}{tokens[-2]}"