In [None]:
#| default_exp pds.ctx_index

# CTX Index

> Scraping the latest CTX index from the latest added volume.

The CTX Index is special as it's latest URL is dynamic, so it needs to be web-scraped.

In [None]:
#| hide
from nbdev.showdoc import show_doc

In [None]:
#| export
from dataclasses import dataclass
from string import Template

import pandas as pd

In [None]:
#| export
@dataclass
class CTXIndex:
    """Class to determine the URL for the latest cumulative index.

    This is a 2 step process, where first the MRO release page is scraped
    for the latest CTX release, and then the latest release page is scraped
    for the latest volume.
    From that latest volume the latest index URL is constructed.
    """
    volumes_url: str = "https://pds-imaging.jpl.nasa.gov/volumes/mro.html"
    release_url_template: Template = Template("https://pds-imaging.jpl.nasa.gov/volumes/mro/release${release}.html")
    volume_url_template: Template = Template(
        "https://pds-imaging.jpl.nasa.gov/data/mro/mars_reconnaissance_orbiter/ctx/mrox_${volume}/")
    scraped_tables: bool = False
    release_scraped: bool = False

    @property
    def web_tables_list(self):
        """Use the pandas scraper to read in the MRO data release table.

        The scraper returns several tables in a list and the last one
        lists all the CTX volumes.

        This could be replaced by cached properties.
        """
        if not self.scraped_tables:
            self._list_of_scraped_tables = pd.read_html(self.volumes_url)
            self.scraped_tables = True
        return self._list_of_scraped_tables

    @property
    def release_number(self):
        """Fishes out the release number.

        This is needed to construct the exact URL to the latest cumulative index file.
        """
        alist = self.web_tables_list
        return alist[-1].iloc[-1, 0].split()[-1]

    @property
    def release_url(self):
        "Constructs the release URL from the release number."
        return self.release_url_template.substitute(release=self.release_number)

    @property
    def latest_volume_url(self):
        """Scrape the Release URL for the latest volume URL in that.

        This is necessary because a release usually has more that one volume.
        """
        if not self.release_scraped:
            alist = pd.read_html(self.release_url)
            # get last row of 4th table
            row = alist[3].iloc[-1]
            number = None
            # first number that is NAN breaks the loop over last row of table
            for elem in row.values:
                try:
                    number = int(elem.split()[-1])
                except AttributeError:
                    break
            self.number = number
            self.release_scraped = True
        return self.volume_url_template.substitute(volume=self.number)

    @property
    def latest_index_label_url(self):
        "Construct the URL for the latest cumulative index."

        return self.latest_volume_url + "index/cumindex.lbl"

In [None]:
ctx = CTXIndex()

In [None]:
show_doc(CTXIndex.web_tables_list)

---

[source](https://github.com/michaelaye/nbplanetary/blob/master/planetarypy/pds/ctx_index.py#L34){target="_blank" style="float:right; font-size:smaller"}

### CTXIndex.web_tables_list

>      CTXIndex.web_tables_list ()

Use the pandas scraper to read in the MRO data release table.

The scraper returns several tables in a list and the last one
lists all the CTX volumes.

This could be replaced by cached properties.

In [None]:
ctx.web_tables_list[-1].head()

Unnamed: 0,Mission to MarsContext Camera (CTX) and Mars Color Imager (MARCI) MRO LEVEL 0 EDRs V1.0,Mission to MarsContext Camera (CTX) and Mars Color Imager (MARCI) MRO LEVEL 0 EDRs V1.0.1,Mission to MarsContext Camera (CTX) and Mars Color Imager (MARCI) MRO LEVEL 0 EDRs V1.0.2
0,Release 1,,CTX and MARCI (includes data acquired during c...
1,Release 2,,CTX (31 volumes mrox_0033-0063) and MARCI (14 ...
2,Release 3,,CTX (64 volumes mrox_0064-0127) and MARCI (28 ...
3,Release 4,,CTX (60 volumes mrox_0128-0187) and MARCI (30 ...
4,Release 5,,CTX (134 volumes mrox_0188-0321) and MARCI (32...


In [None]:
assert type(ctx.web_tables_list) == list

In [None]:
assert len(ctx.web_tables_list) == 4

In [None]:
show_doc(CTXIndex.release_number)

---

[source](https://github.com/michaelaye/nbplanetary/blob/master/planetarypy/pds/ctx_index.py#L48){target="_blank" style="float:right; font-size:smaller"}

### CTXIndex.release_number

>      CTXIndex.release_number ()

Fishes out the release number.

This is needed to construct the exact URL to the latest cumulative index file.

In [None]:
assert int(ctx.release_number) >= 64

In [None]:
show_doc(CTXIndex.release_url)

---

[source](https://github.com/michaelaye/nbplanetary/blob/master/planetarypy/pds/ctx_index.py#L57){target="_blank" style="float:right; font-size:smaller"}

### CTXIndex.release_url

>      CTXIndex.release_url ()

Constructs the release URL from the release number.

In [None]:
assert ctx.release_url == f"https://pds-imaging.jpl.nasa.gov/volumes/mro/release{ctx.release_number}.html"

In [None]:
show_doc(CTXIndex.latest_volume_url)

---

[source](https://github.com/michaelaye/nbplanetary/blob/master/planetarypy/pds/ctx_index.py#L62){target="_blank" style="float:right; font-size:smaller"}

### CTXIndex.latest_volume_url

>      CTXIndex.latest_volume_url ()

Scrape the Release URL for the latest volume URL in that.

This is necessary because a release usually has more that one volume.

In [None]:
ctx.latest_volume_url

'https://pds-imaging.jpl.nasa.gov/data/mro/mars_reconnaissance_orbiter/ctx/mrox_4348/'

In [None]:
assert type(ctx.latest_volume_url) == str

In [None]:
show_doc(CTXIndex.latest_index_label_url)

---

[source](https://github.com/michaelaye/nbplanetary/blob/master/planetarypy/pds/ctx_index.py#L83){target="_blank" style="float:right; font-size:smaller"}

### CTXIndex.latest_index_label_url

>      CTXIndex.latest_index_label_url ()

Construct the URL for the latest cumulative index.

In [None]:
ctx.latest_index_label_url

'https://pds-imaging.jpl.nasa.gov/data/mro/mars_reconnaissance_orbiter/ctx/mrox_4348/index/cumindex.lbl'

In [None]:
#| hide
from nbdev import nbdev_export

nbdev_export()