In [None]:
#default_exp l1a

# L1A Data
> Acquiring L1A PDS data automatically.

> in Progress!

In [None]:
# export
import pandas as pd
from yarl import URL

base_url_1 = URL(
    "https://pds-geosciences.wustl.edu/lro/lro-l-dlre-2-edr-v1/lrodlr_0001/data/"
)

In [None]:
def timestring_to_url(tstr):
    year = tstr[:4]
    yearmonth = tstr[:6]
    yearmonthday = tstr[:8]
    return base_url / year / yearmonth / yearmonthday / f"{tstr}_edr.tab"

In [None]:
tstr = "2010010114"

In [None]:
timestring_to_url(tstr)

URL('https://pds-geosciences.wustl.edu/lro/lro-l-dlre-2-edr-v1/lrodlr_0001/data/2010/201001/20100101/2010010114_edr.tab')

In [None]:
def read_l1a_data(fname, nrows=None):
    df = pd.io.parsers.read_csv(
        fname,
        names=L1AHeader.columns,
        na_values="-9999",
        skiprows=8,
        skipinitialspace=True,
    )
    return parse_times(df)


In [None]:
headerstring = (
    "Q, DATE, UTC, SCLK, SOUNDING, FROM_PKT, PKT_COUNT, SAFING, SAFED, FREEZING, FROZEN, ROLLING, DUMPING, MOVING, TEMP_FAULT,   SC_TIME_SECS,   SC_TIME_SUBS, TICKS_PKT_START, TICKS_AT_SC_TIME, OST_INDEX, EST_INDEX, SST_INDEX, LAST_AZ_CMD, LAST_EL_CMD, FPA_TEMP, FPB_TEMP, BAFFLE_A_TEMP, BAFFLE_B_TEMP, BB_1_TEMP, OBA_1_TEMP, ERROR_TIME, ERROR_ID,  ERROR_DETAIL , ERROR_COUNT, COMMANDS_RECEIVED, COMMANDS_EXECUTED, COMMANDS_REJECTED,    LAST_COMMAND_REC ,      CMD,  REQ_ID , LAST_TIME_COMMAND, LAST_EQX_PREDICTION, HYBRID_TEMP, FPA_TEMP_CYC, FPB_TEMP_CYC, BAFFLE_A_TEMP_CYC, BAFFLE_B_TEMP_CYC, OBA_1_TEMP_CYC, OBA_2_TEMP, BB_1_TEMP_CYC, BB_2_TEMP, SOLAR_TARGET_TEMP, YOKE_TEMP, EL_ACTUATOR_TEMP, AZ_ACTUATOR_TEMP,  MIN_15V, PLU_15V, SOLAR_BASE_TEMP, PLU_5V, "
    "A1_01, A1_02, A1_03, A1_04, A1_05, A1_06, A1_07, A1_08, A1_09, A1_10, A1_11, A1_12, A1_13, A1_14, A1_15, A1_16, A1_17, A1_18, A1_19, A1_20, A1_21, A2_01, A2_02, A2_03, A2_04, A2_05, A2_06, A2_07, A2_08, A2_09, A2_10, A2_11, A2_12, A2_13, A2_14, A2_15, A2_16, A2_17, A2_18, A2_19, A2_20, A2_21, A3_01, A3_02, A3_03, A3_04, A3_05, A3_06, A3_07, A3_08, A3_09, A3_10, A3_11, A3_12, A3_13, A3_14, A3_15, A3_16, A3_17, A3_18, A3_19, A3_20, A3_21, A4_01, A4_02, A4_03, A4_04, A4_05, A4_06, A4_07, A4_08, A4_09, A4_10, A4_11, A4_12, A4_13, A4_14, A4_15, A4_16, A4_17, A4_18, A4_19, A4_20, A4_21, A5_01, A5_02, A5_03, A5_04, A5_05, A5_06, A5_07, A5_08, A5_09, A5_10, A5_11, A5_12, A5_13, A5_14, A5_15, A5_16, A5_17, A5_18, A5_19, A5_20, A5_21, A6_01, A6_02, A6_03, A6_04, A6_05, A6_06, A6_07, A6_08, A6_09, A6_10, A6_11, A6_12, A6_13, A6_14, A6_15, A6_16, A6_17, A6_18, A6_19, A6_20, A6_21, B1_01, B1_02, B1_03, B1_04, B1_05, B1_06, B1_07, B1_08, B1_09, B1_10, B1_11, B1_12, B1_13, B1_14, B1_15, B1_16, B1_17, B1_18, B1_19, B1_20, B1_21, B2_01, B2_02, B2_03, B2_04, B2_05, B2_06, B2_07, B2_08, B2_09, B2_10, B2_11, B2_12, B2_13, B2_14, B2_15, B2_16, B2_17, B2_18, B2_19, B2_20, B2_21, B3_01, B3_02, B3_03, B3_04, B3_05, B3_06, B3_07, B3_08, B3_09, B3_10, B3_11, B3_12, B3_13, B3_14, B3_15, B3_16, B3_17, B3_18, B3_19, B3_20, B3_21"
)

In [None]:
def parse_header_line(line):
    """Parse header lines.

    >>> s = ' a   b  c    '
    >>> parse_header_line(s)
    ['a', 'b', 'c']
    >>> s = '  a, b  ,   c '
    >>> parse_header_line(s)
    ['a', 'b', 'c']
    """
    line = line.strip("#")
    if "," in line:
        newline = line.split(",")
    else:
        newline = line.split()
    return [i.strip().lower() for i in newline]


class L1AHeader:
    # beware: parse_header_line converts to lower case!
    columns = parse_header_line(headerstring)

    tel1cols = [
        "a{0}_{1}".format(i, str(j).zfill(2)) for i in range(1, 7) for j in range(1, 22)
    ]
    tel2cols = [
        "b{0}_{1}".format(i, str(j).zfill(2)) for i in range(1, 4) for j in range(1, 22)
    ]

    datacols = tel1cols + tel2cols

    metadatacols = list(set(columns) - set(datacols))
    metadatacols.sort()

In [None]:
def parse_times(df):
    format = "%d-%b-%Y %H:%M:%S.%f"
    # I don't need to round the seconds here because the df.utc data has
    # already a 3-digit millisecond string: '19:00:00.793'
    times = pd.to_datetime(df.date + " " + df.utc, format=format, utc=False)
    df.set_index(times, inplace=True)
    return df.drop(["date", "utc"], axis=1)

In [None]:
class FileName(object):

    """Managing class for file name attributes."""

    ext = ""  # fill in child class !
    datapath = Path.home()  # fill in child class !

    @classmethod
    def from_tstr(cls, tstr):
        fname = pjoin(cls.datapath, tstr + cls.ext)
        return cls(fname)

    def __init__(self, fname):
        super(FileName, self).__init__()
        self.basename = path.basename(fname)
        self.dirname = path.dirname(fname)
        self.file_id, self.ext = path.splitext(self.basename)
        self.tstr = self.file_id.split("_")[0]
        # as Diviner FILES only exist in separations of hours I use DivTime
        # here:
        self.divhour = DivTime(self.tstr)
        # save everything after the first '_' as rest
        self.rest = self.basename[len(self.tstr) :]

    @property
    def path(self):
        return self.fname

    @property
    def name(self):
        return self.fname

    @property
    def fname(self):
        return pjoin(self.dirname, self.tstr + self.rest)

    def __str__(self):
        s = f"{self.__class__.__name__}\n"
        s += f"Dir: {self.dirname}\n"
        s += f"Base: {self.basename}"
        return s

    def __repr__(self):
        return self.__str__()


class L1AFileName(FileName):
    ext = "_L1A.TAB"
    datapath = l1adatapath

NameError: name 'l1adatapath' is not defined

In [None]:
class L1ADataFile:
    def __init__(self, fname):
        self.fname = fname
#         self.fn_handler = FileName(fname)
        self.header = L1AHeader()
        self.df = None

    def parse_tab(self):
        df = pd.io.parsers.read_csv(
            self.fname,
            names=self.header.columns,
            na_values="-9999",
            skiprows=8,
            skipinitialspace=True,
        )
        return df

    def parse_times(self, df):
        return parse_times(df)

#     def clean(self, df):
#         df = prepare_data(df)
#         define_sdtype(df)
#         return df

    def open_dirty(self):
        return self.read_dirty()

    def read_dirty(self):
        return self.read(dirty=True)

    def read(self, dirty=False):
        df = self.parse_tab()
        df = self.parse_times(df)
        if dirty:
            return df
        else:
            return self.clean(df)


In [None]:
l1a = L1ADataFile(str(timestring_to_url(tstr)))

In [None]:
l1a.parse_tab()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,q,date,utc,sclk,sounding,from_pkt,pkt_count,safing,safed,freezing,...,b3_12,b3_13,b3_14,b3_15,b3_16,b3_17,b3_18,b3_19,b3_20,b3_21
0,01-Jan-2010,14:00:01.224,2.840472e+08,5,10563,,,,,,,,,,,,,,,-9999,-9999,,,,,,,,,29190.0,,,,,,,,,,,-9999,,,,...,27548,27569,27564,27524,27551,27551,27525,27547,27573,27558
0,01-Jan-2010,14:00:01.352,2.840472e+08,6,10564,,,,,,,,,,,,,,,-9999,-9999,,,,,,,,,,,,,,,,,,,,-9999,,,,...,27542,27571,27564,27528,27555,27554,27532,27548,27579,27556
0,01-Jan-2010,14:00:01.480,2.840472e+08,7,10564,,,,,,,,,,,,,,,-9999,-9999,,,,,,,,,,29032.0,,,,,,,,,,-9999,,,,...,27549,27565,27565,27523,27551,27549,27525,27547,27579,27557
0,01-Jan-2010,14:00:01.608,2.840472e+08,8,10565,9249.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0x00,284047159.0,60311.0,222890015.0,222888713.0,0x9EA4,49,1.0,1.0,,,,,,,,,,,,10384.0,19015.0,15.0,16.0,283890685.0,4.0,0x000000,93.0,1294.0,1265.0,...,27540,27563,27564,27526,27551,27555,27525,27542,27571,27546
0,01-Jan-2010,14:00:01.736,2.840472e+08,9,10565,,,,,,,,,,,,,,,-9999,-9999,,,,,,,,,,,28928.0,,,,,,,,,-9999,,,,...,27545,27565,27569,27526,27551,27553,27529,27545,27574,27553
0,01-Jan-2010,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,01-Jan-2010,15:00:00.544,2.840508e+08,12,11753,,,,,,,,,,,,,,,-9999,-9999,,,,,,,,,,,,,,,,,,,,-9999,,,,...,27529,27529,27545,27509,27530,27526,27493,27507,27538,27519
0,01-Jan-2010,15:00:00.672,2.840508e+08,13,11754,,,,,,,,,,,,,,,-9999,-9999,,,,,,,,,,,,,,,,,,,,-9999,,,,...,27523,27532,27539,27506,27535,27531,27498,27511,27541,27525
0,01-Jan-2010,15:00:00.800,2.840508e+08,14,11754,,,,,,,,,,,,,,,-9999,-9999,,,,,,,,,,,,,,,,,,,,-9999,,,,...,27523,27538,27540,27498,27529,27531,27501,27515,27548,27530
0,01-Jan-2010,15:00:00.928,2.840508e+08,15,11755,,,,,,,,,,,,,,,-9999,-9999,,,,,,,,,,,,,,,,,,,,-9999,,,,...,27524,27541,27541,27498,27524,27530,27498,27524,27557,27536
