---
---
# [0] Imports / Setup

In [None]:
from pathlib import Path
import re

import pandas as pd
import numpy as np

from redplanet.DatasetManager.hash import _calculate_hash_from_file, get_available_algorithms
from redplanet.DatasetManager.download import _download_file_from_url
from redplanet.helper_functions.coordinates import _plon2slon

In [None]:
def print_table(data):
    if not data:
        return

    num_columns = max(len(row) for row in data)
    max_widths = [0] * num_columns

    ## compute max widths for each column
    for j in range(num_columns):
        column_values = [str(row[j]) if j < len(row) else '' for row in data]
        max_widths[j] = max(len(value) for value in column_values)

    ## print rows
    for row in data:
        for j in range(num_columns):
            value = str(row[j]) if j < len(row) else ''
            print(value.ljust(max_widths[j]), end='    ')
        print()

In [None]:
'''NOTE: set this according to your own system'''
dirpath_cwd = Path('/home/lain/root/100_work/110_projects/111_mars/code-repos/redplanet/datasets/Craters')

In [None]:
'''see readme'''

download_info = {
    'crater_ages': {
        # see Supplementary Table 3 of https://doi.org/10.1016/j.icarus.2013.03.019
        'fname' : 'Table 3.tex',
        'url'   : 'https://rutgers.box.com/shared/static/fdk83g5g5pn2kltrqodvwnvmvjbmggzh',
        'sha256': 'f81bf35ba76f0f2e9939d7a338084451145cdc8d9771124ac4e8ec71802ea236',
    },
    'crater_database': {
        # https://craters.sjrdesign.net/
        'fname' : 'Catalog_Mars_Release_2020_1kmPlus_FullMorphData.csv',
        'url'   : 'https://rutgers.box.com/shared/static/sry0fof5brqu9pz2tfk6xfix7c3w1xyu',
        'sha256': '348e5b88912e6e67b71fb4afffc8f76a170524e1308c171f98c805c045813c22',
    },
    'crater_names': {
        # https://planetarynames.wr.usgs.gov/SearchResults?Target=20_Mars&Feature%20Type=9_Crater,%20craters
        'fname' : 'IAU-crater-names_as-of_2024-11-26.csv',
        'url'   : 'https://rutgers.box.com/shared/static/xjljza4gw9743dutlpez8m8ccgmkzfnd',
        'sha256': '4c08fe5c2477d20ffdd088d45275fb1469fd2970900aa5b9aeff66160285a5ea',
    },
}





from contextlib import contextmanager

@contextmanager
def temp_download(key):
    """context manager for downloading and cleaning up a temporary file."""
    metadata = download_info[key]

    dirpath_intermediate = dirpath_cwd / 'intermediate'
    dirpath_intermediate.mkdir(exist_ok=True, parents=True)
    fpath = dirpath_intermediate / metadata['fname']

    try:
        if not fpath.exists():
            _download_file_from_url(metadata['url'], fpath)

        computed_hash = _calculate_hash_from_file(fpath, 'sha256')

        if computed_hash != metadata['sha256']:
            error_msg = [
                f'hash mismatch:',
                f'- expected hash: {metadata["sha256"]}',
                f'- computed hash: {computed_hash}',
            ]
            raise ValueError('\n'.join(error_msg))

        yield fpath    ## yield the file path for use in the `with` block

    finally:
        # if fpath.exists():
        #     fpath.unlink()
        pass

---
---
# [1] Read: Robbins 2013 crater ages  _(latex -> pandas)_

In [None]:
with temp_download('crater_ages') as fpath:
    with fpath.open('r') as f:
        dat = f.readlines()



'''read only the table data'''
dat = dat[11:12] + dat[15:-6]
del dat[25]  # remove comment





'''format/parse'''
newdat = []
for row in dat:
    row = row.replace('\\\\', '')
    row = row.strip()
    row = row.split('&')

    newrow = []

    for item in row:

        item = item.strip()

        ## get rid of latex inline math
        if item.startswith('$') and item.endswith('$'):
            item = item[1:-1]
            item = item.strip()

        ## for "Name" values like "\gamma"
        if item.startswith('\\'):
            item = item[1:]
            item = item.strip()

        ## convert an age like `3.93_{-0.08}^{+0.05 }` to `3.93,-0.08,0.05`
        if ('_{' in item) and ('^{' in item):
            item = item.split('{')
            numbers = []
            for n in item:
                n = re.sub(r'[^0-9.\-]', '', n)
                numbers.append(n)
            item = ';'.join(numbers)

        ## convert numerics
        try:
            item = float(item)
        except ValueError:
            pass

        # ## verify all ages have been converted properly
        # if '^' in item:
        #     print(item)

        newrow.append(item)

    newdat.append(newrow)





'''convert to dataframe and cleanup types'''

cols = {
    'Name'           : 'name',
    'Diameter (km)'  : 'diam',
    'Latitude'       : 'lat',
    'Longitude'      : 'lon',
    'N_{\rm{H}}(10)' : 'N_H(10)',
    'N_{\rm{N}}(10)' : 'N_N(10)',
    'N_{\rm{H}}(25)' : 'N_H(25)',
    'N_{\rm{N}}(25)' : 'N_N(25)',
    'N_{\rm{H}}(50)' : 'N_H(50)',
    'N_{\rm{N}}(50)' : 'N_N(50)',
    'Hartmann Isochron Age (Ga)'      : 'Hartmann Isochron Age',
    'Neukum Isochron Age (Ga)'        : 'Neukum Isochron Age',
    'Hartmann Turn-Off Diameter (km)' : 'Hartmann Turn-Off Diameter',
    'Neukum Turn-Off Diameter (km)'   : 'Neukum Turn-Off Diameter',
}

df_ages = pd.DataFrame(
    newdat[1:],
    columns = list(cols.keys())
)
df_ages.rename(columns=cols, inplace=True)


## strings
df_ages = df_ages.convert_dtypes()
df_ages = df_ages.replace('', pd.NA)


## numerics
numeric_cols = [
    'diam',
    'lat',
    'lon',
    'Hartmann Turn-Off Diameter',
    'Neukum Turn-Off Diameter',

]
for col in numeric_cols:
    df_ages[col] = pd.to_numeric(df_ages[col], errors='coerce')

df_ages['lon'] = df_ages['lon'].apply(_plon2slon)

## lol what is this spelling
df_ages.loc[df_ages['name'] == 'Bacquerel', 'name'] = 'Becquerel'





'''print/display'''
# print(df_ages.dtypes)
df_ages

In [None]:
df_ages.query('name == "Henry"')

In [None]:
x = df_ages[ pd.notna(df_ages['Hartmann Isochron Age']) ].shape[0]

print(f'resolvable ages for {x} craters')

---
---
# [2] Read: Robbins 2020 crater database

In [None]:
## NOTE: for now only keep lon/lat for easy working purposes, but later on merge elliptical stuff based on crater id
cols = {
    'CRATER_ID'              : 'id',
    'LAT_CIRC_IMG'           : 'lat',
    'LON_CIRC_IMG'           : 'lon',
    # 'LAT_ELLI_IMG'           : 'lat_elli',
    # 'LON_ELLI_IMG'           : 'lon_elli',
    'DIAM_CIRC_IMG'          : 'diam',
    'DIAM_CIRC_SD_IMG'       : 'diam_sd',
    'DIAM_ELLI_MAJOR_IMG'    : 'diam_elli_major',
    'DIAM_ELLI_MINOR_IMG'    : 'diam_elli_minor',
    # 'DIAM_ELLI_ECCEN_IMG'    : 'diam_elli_eccen',
    # 'DIAM_ELLI_ELLIP_IMG'    : 'diam_elli_ellip',
    'DIAM_ELLI_ANGLE_IMG'    : 'diam_elli_angle',
    ## normally i ignore everything past this point...
    # 'LAT_ELLI_SD_IMG'        : ...,
    # 'LON_ELLI_SD_IMG'        : ...,
    'DIAM_ELLI_MAJOR_SD_IMG' : 'diam_elli_major_sd',
    'DIAM_ELLI_MINOR_SD_IMG' : 'diam_elli_minor_sd',
    # 'DIAM_ELLI_ANGLE_SD_IMG' : ...,
    # 'DIAM_ELLI_ECCEN_SD_IMG' : ...,
    # 'DIAM_ELLI_ELLIP_SD_IMG' : ...,
    # 'ARC_IMG'                : ...,
    # 'PTS_RIM_IMG'            : ...,
    # 'LAY_NUMBER'             : ...,
    # 'LAY_MORPH1'             : ...,
    # 'LAY_MORPH2'             : ...,
    # 'LAY_MORPH3'             : ...,
    # 'LAY_NOTES'              : ...,
    # 'INT_MORPH1'             : ...,
    # 'INT_MORPH2'             : ...,
    # 'INT_MORPH3'             : ...,
    # 'CONF'                   : ...,
    # 'NOTES'                  : ...,
    # 'DEG_RIM'                : ...,
    # 'DEG_EJC'                : ...,
    # 'DEG_FLR'                : ...,
}



with temp_download('crater_database') as fpath:
    df_20 = pd.read_csv(
        fpath,
        header  = 0,
        usecols = list(cols.keys())
    )

df_20.rename(columns=cols, inplace=True)

df_20.sort_values('diam', ascending=False, ignore_index=True, inplace=True)

min_diam = 50
df_20 = df_20[df_20['diam'] >= min_diam]

df_20.drop_duplicates(inplace=True)

df_20['lon'] = df_20['lon'].apply(_plon2slon)

df_20.insert(loc=1, column='name', value=pd.Series(dtype='string'))



# print(df_20.dtypes)
df_20

In [None]:
lons = sorted(np.array((df_20['lon'].values)))
print(f'lons: {lons[0]} -> {lons[-1]}')

lons: -179.9231989 -> 179.80030220000003

---
---
# [3] Read: IAU crater names

In [None]:
cols = {
    'Feature Name'              : 'name',
    # 'Target'                    : '',    # all 'Mars'
    'Diameter'                  : 'diam',
    'Center Latitude'           : 'lat',
    'Center          Longitude' : 'lon',
    # 'Feature Type'              : '',    # all 'Crater, craters
    # 'Approval Date'             : '',    # idc
    # 'Origin'                    : '',    # idc
}

with temp_download('crater_names') as fpath:
    df_names = pd.read_csv(
        fpath,
        header  = 0,
        usecols = list(cols.keys()),
    )



print(f'{df_names.shape[0]} entries initially')

df_names.rename(columns=cols, inplace=True)

df_names.sort_values('diam', ascending=False, ignore_index=True, inplace=True)

min_diam = 50
df_names = df_names[ df_names['diam'] >= min_diam ]

print(f'({min_diam}km min diam cutoff)')
print(f'{df_names.shape[0]} entries left')

df_names['lon'] = df_names['lon'].apply(_plon2slon)

df_names

1218 entries initially

(50km min diam cutoff)

341 entries left

---
---
# [4] Merge names: df_names -> df_20

In [None]:
df_20_n = df_20.copy()


coord_error = 0.3

# diam_error  = 15
diam_rel_error = 0.1

diam_lessthan_ignore = 52



'''merge names from iau to robbinsV20'''

print(f'named craters: {df_names.shape[0]}\n')

print(f'ignoring named craters with D<{diam_lessthan_ignore}km: {df_names[df_names["diam"] < diam_lessthan_ignore].shape[0]}\n')


messy_results = [['NUM_RESULTS', 'NAME', 'DIAM', 'LON', 'LAT']]

for i, crater in df_names.iterrows():

    ## ignore edge cases around 50km diam threshold
    if crater['diam'] < diam_lessthan_ignore:
        continue

    search = df_20_n[
        (df_20_n['lon' ].between( crater['lon'] - coord_error       , crater['lon'] + coord_error       )) &
        (df_20_n['lat' ].between( crater['lat'] - coord_error       , crater['lat'] + coord_error       )) &
        (df_20_n['diam'].between( crater['diam']*(1-diam_rel_error) , crater['diam']*(1+diam_rel_error) ))
    ]

    num_results = search.shape[0]

    if num_results == 1:
        df_20_n.loc[search.index, 'name'] = crater['name']
    else:
        messy_results.append([f'{num_results}', crater['name'], crater['diam'], crater['lon'], crater['lat']])



print(f'added names: {df_20_n["name"].notna().sum()}\n')
print(f'messy results: {len(messy_results) - 1}\n')
print_table(messy_results)

---

named craters: 341

ignoring named craters with D<52km: 20

added names: 313

messy results: 8

NUM_RESULTS   | NAME           | DIAM     | LON                   | LAT       
---           | ---            | ---      | ---                   | ---
0             | Robert Sharp   | 152.08   | 133.41999999999996    | -4.17     
0             | Roemer         | 120.0    | 8.090000000000003     | -27.46    
0             | Barth          | 111.0    | 25.670000000000016    | 7.44      
0             | Tycho Brahe    | 105.27   | 146.12                | -49.41    
0             | Richardson     | 89.0     | -179.86               | -72.47    
0             | Novara         | 86.98    | -10.689999999999998   | -24.9     
0             | Elston         | 75.0     | 119.18                | -15.39    
0             | Eberswalde     | 62.19    | -33.30000000000001    | -23.98    

In [None]:
'''manually assign messy results'''

already_exists = (
    ('Roemer'     , '11-3-000193'),  ## reason: big diameter discrepancy && overlapping (not sure if distinct?)
    ('Roemer'     , '11-0-002560'),  ## ^
    ('Barth'      , '10-0-003400'),  ## reason: reasonable diameter discrepency
    ('Tycho Brahe', '16-1-002468'),  ## reason: very elliptical
    ('Richardson' , '16-1-009592'),  ## reason: lon wraparound
    ('Novara'     , '07-0-002030'),  ## reason: very elliptical
    ('Elston'     , '15-1-013698'),  ## reason: big diameter discrepancy && overlapping (not sure if distinct?)
    ('Elston'     , '15-1-013699'),  ## ^
    ('Eberswalde' , '07-1-015599'),  ## reason: very elliptical
    ## NOTE: not assigning Robert Sharp bc i'm not sure if it's actually a crater: https://planetarynames.wr.usgs.gov/Feature/15002
)

for name, crater_id in already_exists:
    df_20_n.loc[ df_20_n['id'] == crater_id, 'name' ] = name

print(f'named craters in newly merged v20 dataset (not counting duplicates): {df_20_n["name"].unique().shape[0] - 1}')

In [None]:
df_20_n.query('name == "Elston"')

In [None]:
df_20_n.query('name == "Roemer"')

---
---
# [5] Merge ages: df_ages -> df_20_n

In [None]:
'''init empty columns'''
df_ages_cols_toinclude = [
    # 'name',
    # 'diam',
    # 'lat',
    # 'lon',
    'N_H(10)',
    'N_N(10)',
    'N_H(25)',
    'N_N(25)',
    'N_H(50)',
    'N_N(50)',
    'Hartmann Isochron Age',
    'Neukum Isochron Age',
    'Hartmann Turn-Off Diameter',
    'Neukum Turn-Off Diameter',
]
df_20_na = df_20_n.copy()
for col in df_ages_cols_toinclude:
    df_20_na[col] = pd.NA





coord_error = 1

# diam_error  = 10
diam_rel_error = 0.1



'''merge names from iau to robbinsV20'''

messy_results = [['NUM_RESULTS', 'NAME', 'DIAM', 'LON', 'LAT']]

for i, crater in df_ages.iterrows():

    search = df_20_na[
        (df_20_na['lon' ].between( crater['lon'] - coord_error       , crater['lon'] + coord_error       )) &
        (df_20_na['lat' ].between( crater['lat'] - coord_error       , crater['lat'] + coord_error       )) &
        (df_20_na['diam'].between( crater['diam']*(1-diam_rel_error) , crater['diam']*(1+diam_rel_error) ))
    ]
    num_results = search.shape[0]

    if num_results == 1:
        df_20_na.loc[search.index, df_ages_cols_toinclude] = crater[df_ages_cols_toinclude].values

    else:
        messy_results.append([f'{num_results}', crater['name'], crater['diam'], crater['lon'], crater['lat']])


print(f'messy results: {len(messy_results) - 1}\n')
print_table(messy_results)

We don't care about $\nu$ ('nu') crater, the Robbins age table gives zero info (ages/densities) besides the coords/diameter lol

In [None]:
print(f'num craters with resolved ages: {df_20_na[ pd.notna(df_20_na["Hartmann Isochron Age"]) ].shape[0]}')

In [None]:
dirpath_out = dirpath_cwd / 'output'
Path.mkdir(dirpath_out, exist_ok=True)
fpath_out = dirpath_out / 'craters_with_names_and_ages_50km.csv'

df_20_na.to_csv(fpath_out, index=False)

df = pd.read_csv(fpath_out)
df

In [None]:
for alg in get_available_algorithms():
    print(f'{alg}: {_calculate_hash_from_file(fpath_out, alg)}')

- xxh3_64: ea14d77f25f090c4
- md5: 4e63fd2a7f1367d131ee606edcdfb5f7
- sha1: 79113d236836e1d8bb53e517ab3cfc4afad2cac2
- sha256: e48808ef670e39e812149e4731634d59964b7b3465b1be38eda920f890125bdc

In [None]:
df.query('name == "Elston"')