In [None]:
#| default_exp pds.ctx_index

# CTX Index
> Scraping the latest CTX index file from the latest added volume.

In [None]:
#| hide
from nbdev.showdoc import show_doc

In [None]:
#| export
from dataclasses import dataclass
from ssl import SSLError
from string import Template

from yarl import URL

import pandas as pd

In [None]:
#| export
class CTXIndex:
    url = 'https://planetarydata.jpl.nasa.gov/img/data/mro/mars_reconnaissance_orbiter/ctx/'

    def __init__(self):
        self._volumes_table = None

    @property
    def volumes_table(self):
        if self._volumes_table is None:
            self._volumes_table = pd.read_html(self.url)[0].dropna(
                how='all', axis=1).dropna(how='all', axis=0).iloc[1:, :-1]
        return self._volumes_table

    @property
    def latest_release_folder(self):
        return self.volumes_table.iloc[-2, 0]

    @property
    def latest_release_number(self):
        return self.latest_release_folder.rstrip('/').split("_")[1]

    @property
    def latest_index_label_url(self):
        return URL(self.url) / f"{self.latest_release_folder}/index/cumindex.lbl"

In [None]:
ctx = CTXIndex()

In [None]:
ctx.volumes_table

Unnamed: 0,Name,Last modified
2,mrox_0001/,05-Jun-2007 10:15
3,mrox_0001_md5.txt,10-Dec-2019 04:32
4,mrox_0002/,05-Jun-2007 10:15
5,mrox_0002_md5.txt,09-Dec-2019 19:14
6,mrox_0003/,05-Jun-2007 10:15
...,...,...
9515,mrox_4757_md5.txt,15-Nov-2023 23:23
9516,mrox_4758/,15-Nov-2023 23:08
9517,mrox_4758_md5.txt,15-Nov-2023 23:23
9518,mrox_4759/,15-Nov-2023 23:10


In [None]:
ctx.latest_release_folder

'mrox_4759/'

In [None]:
ctx.latest_release_number

'4759'

In [None]:
ctx.latest_index_label_url

URL('https://planetarydata.jpl.nasa.gov/img/data/mro/mars_reconnaissance_orbiter/ctx/mrox_4759/index/cumindex.lbl')

In [None]:
#| hide
from nbdev import nbdev_export

nbdev_export()