# BIOMASS telemetry

In [1]:
import math
import os
from datetime import datetime, timedelta
from pathlib import Path
from typing import BinaryIO, Literal, Optional, Union

import bpack
import pandas as pd
from dotenv import load_dotenv
from tqdm.notebook import tqdm

from biotm import tm

%reload_ext watermark
%watermark -v --iversions

Python implementation: CPython
Python version       : 3.9.5
IPython version      : 7.22.0

biotm : 0.6
pandas: 1.2.4
bpack : 0.8.0



In [2]:
def pathwalk(path=''):
    path = Path(path)
    for item in path.iterdir():
        if item.is_file():
            yield item
        elif item.is_dir():
            yield from pathwalk(item)


def apid(file):
    with file.open('rb') as fh:
        stream = tm.ISPStream(fh)
        isp = next(stream.iterpackets())
        return {k: v for k, v in vars(isp.packet_header).items() if k in ('pid', 'pcat')}


load_dotenv()
datadir = Path(os.getenv('DATADIR')).expanduser()
            
inventory = [{'version': 1, 'name': f.name, 'file': f} for f in pathwalk(datadir / 'ISPs') if f.suffix == '.isp']
inventory.extend([{'version': 2, 'name': f.name, 'file': f} for f in pathwalk(datadir / 'ISPs2' / 'Data') if f.suffix == '.isp'])
inventory = pd.DataFrame(inventory)

inventory = pd.concat([inventory, pd.DataFrame([apid(f) for f in inventory['file']])], axis=1)[['version', 'pid', 'pcat', 'name', 'file']]
inventory.sort_values(['version', 'pid'], inplace=True)

inventory

Unnamed: 0,version,pid,pcat,name,file
2,1,22,10,BIO_OPER_RAW_XB_SP__2210NNNN_20250101T060241.2...,/Users/luca mariani/Workspace/data/BIOMASS/ISP...
4,1,23,10,BIO_OPER_RAW_XB_SP__2310NNNN_20250101T060241.2...,/Users/luca mariani/Workspace/data/BIOMASS/ISP...
3,1,24,10,BIO_OPER_RAW_XB_SP__2410NNNN_20250101T060241.2...,/Users/luca mariani/Workspace/data/BIOMASS/ISP...
1,1,25,10,BIO_OPER_RAW_XB_SP__2510NNNN_20250101T060251.2...,/Users/luca mariani/Workspace/data/BIOMASS/ISP...
0,1,26,10,BIO_OPER_RAW_XB_SP__2610NNNN_20250101T060251.2...,/Users/luca mariani/Workspace/data/BIOMASS/ISP...
8,2,22,10,BIO_OPER_RAW_XB_SP__2210NNNN_20170101T060229.2...,/Users/luca mariani/Workspace/data/BIOMASS/ISP...
6,2,23,10,BIO_OPER_RAW_XB_SP__2310NNNN_20170101T060229.2...,/Users/luca mariani/Workspace/data/BIOMASS/ISP...
7,2,24,10,BIO_OPER_RAW_XB_SP__2410NNNN_20170101T060229.2...,/Users/luca mariani/Workspace/data/BIOMASS/ISP...
9,2,25,10,BIO_OPER_RAW_XB_SP__2510NNNN_20170101T060239.2...,/Users/luca mariani/Workspace/data/BIOMASS/ISP...
5,2,26,10,BIO_OPER_RAW_XB_SP__2610NNNN_20170101T060239.2...,/Users/luca mariani/Workspace/data/BIOMASS/ISP...


In [3]:
def data_frame(file, source_data_type=tm.SourceDataType.Generic, maxcount=None):

    packet_header_size = tm.calcsize(tm.PacketHeader)
    if maxcount is None:
        maxcount = math.inf

    with file.open('rb') as fh:
        with tqdm(total=file.stat().st_size / 1024 ** 2, desc=file.name, unit=' MB') as pbar:

            stream = tm.ISPStream(fh)
            data = []
            
            for n, isp in enumerate(stream.iterpackets(source_data_type)):

                if n == maxcount:
                    break
                record = {
                    'size': packet_header_size + isp.packet_header.pdfl + 1
                }
                record.update(tm.asdict(isp.packet_header))
                record.update(tm.asdict(isp.data_field_header))

                if isinstance(isp.source_data, tm.SARSourceData):

                    sar_header = isp.source_data.sar_header()
                    record['fte'] = sar_header.fte
                    record.update(tm.asdict(sar_header.obt_stat))
                    record.update(synch=sar_header.synch, spct=sar_header.spct, prict=sar_header.prict)
                    record.update(bpack.asdict(sar_header.data_take_information_service))
                    record.update(
                        {k: v for k, v in bpack.asdict(sar_header.real_time_parameter_service).items()
                         if k not in ('sspa', 'rcm', 'sidx', 'spares')}
                    )
                    record['user_data_size'] = len(isp.source_data.user_data())

                elif isinstance(isp.source_data, tm.PlatformAncillarySourceData):

                    plt_anc_data = tm.asdict(isp.source_data.platform_ancillary_data())
                    for name in ('nav_ut', 'nav_prop_pos', 'nav_prop_vel', 'iae_dse_est_quat', 'iae_dse_est_ang_rate'):
                        param = plt_anc_data.pop(name)
                        for k, v in param.items():
                            plt_anc_data[f'{name}_{k}'] = v
                    for name in ('instr_heat_ctrl_therm', 'instr_mon_therm', 'comm_quat', 'comm_ang_rate'):
                        del plt_anc_data[name]
                    record.update({k: plt_anc_data[k] for k in sorted(plt_anc_data)})
                    
                elif isinstance(isp.source_data, tm.InstrumentAncillarySourceData):
                    
                    instrument_ancillary_data = isp.source_data.instrument_ancillary_data()
                    record.update(
                        hka_asw_version=instrument_ancillary_data.information_block.hka_asw_version,
                        conf_id = instrument_ancillary_data.information_block.conf_id
                    )
                    
                elif isinstance(isp.source_data, tm.SourceData):
                    
                    pass
                    
                else:

                    raise ValueError(f"unknown source data type: '{source_data_type}'")

                data.append(record)
                pbar.update(record['size'] / 1024 ** 2)

    data = pd.DataFrame(data)
    data.insert(1, 'offset', data['size'].cumsum() - data['size'])
    
    if 'fte' in data:
        timestamp = data[['ct', 'ft', 'fte']].apply(
            lambda r: pd.to_timedelta(tm.timestamp(r['ct'], r['ft'], r['fte']), 's'),
            axis=1
        )
    else:
        timestamp = data[['ct', 'ft']].apply(
            lambda r: pd.to_timedelta(tm.timestamp(r['ct'], r['ft']), 's'),
            axis=1
        )
    data.insert(2, 'timestamp', timestamp + pd.Timestamp('2000-01-01'))
    
    return data

In [4]:
df = data_frame(inventory.query('version == 2 and pid == 22')['file'][8], tm.SourceDataType.PlatformAncillary)

df.head().iloc[:,15:]

BIO_OPER_RAW_XB_SP__2210NNNN_20170101T060229.259.isp:   0%|          | 0/0.031236648559570312 [00:00<?, ? MB/s…

Unnamed: 0,ct,ft,aocs_mode,aocs_nm,aocs_version,drift_flag,iae_dse_est_ang_rate_x,iae_dse_est_ang_rate_y,iae_dse_est_ang_rate_z,iae_dse_est_quat_s,...,nav_prop_vel_x,nav_prop_vel_y,nav_prop_vel_z,nav_ut_day,nav_ut_dayf,nav_valid,obt_synch,orbit_number,roll_steering_idx,str_qual
0,536565786,31217,0,0,0,0,0.0,0.0,0.0,0.661045,...,-1220.621916,-1537.124712,7353.499007,57754.0,0.251728,0,0,0,0,0
1,536565787,31217,0,0,0,0,0.0,0.0,0.0,0.66137,...,-1228.73816,-1536.683574,7352.240976,57754.0,0.251739,0,0,0,0,0
2,536565788,31217,0,0,0,0,0.0,0.0,0.0,0.661695,...,-1236.852963,-1536.239508,7350.974543,57754.0,0.251751,0,0,0,0,0
3,536565789,31217,0,0,0,0,0.0,0.0,0.0,0.66202,...,-1244.966313,-1535.792517,7349.699707,57754.0,0.251762,0,0,0,0,0
4,536565790,31217,0,0,0,0,0.0,0.0,0.0,0.662345,...,-1253.078202,-1535.342599,7348.416472,57754.0,0.251774,0,0,0,0,0


In [5]:
df.columns

Index(['size', 'offset', 'timestamp', 'pktvers', 'type', 'dfh', 'pid', 'pcat',
       'gf', 'ssc', 'pdfl', 'pusvers', 'serv', 'subserv', 'dest', 'ct', 'ft',
       'aocs_mode', 'aocs_nm', 'aocs_version', 'drift_flag',
       'iae_dse_est_ang_rate_x', 'iae_dse_est_ang_rate_y',
       'iae_dse_est_ang_rate_z', 'iae_dse_est_quat_s', 'iae_dse_est_quat_x',
       'iae_dse_est_quat_y', 'iae_dse_est_quat_z', 'in_orbit_pos', 'nav_dt',
       'nav_gnss_pv_date', 'nav_gnss_pvt_val', 'nav_prop_pos_x',
       'nav_prop_pos_y', 'nav_prop_pos_z', 'nav_prop_vel_x', 'nav_prop_vel_y',
       'nav_prop_vel_z', 'nav_ut_day', 'nav_ut_dayf', 'nav_valid', 'obt_synch',
       'orbit_number', 'roll_steering_idx', 'str_qual'],
      dtype='object')

In [6]:
MJD_OFFSET = 2400000.5 

In [7]:
df['ut_timestamp'] = pd.to_datetime(df['nav_ut_day'] + df['nav_ut_dayf'] + MJD_OFFSET, unit='D', origin='julian')
df['difference'] = (df['timestamp'] - df['ut_timestamp']).apply(lambda d: d.total_seconds())

df[['timestamp', 'ut_timestamp', 'difference']]

Unnamed: 0,timestamp,ut_timestamp,difference
0,2017-01-01 06:03:06.476333618,2017-01-01 06:02:29.259701248,37.216632
1,2017-01-01 06:03:07.476333618,2017-01-01 06:02:30.259736064,37.216597
2,2017-01-01 06:03:08.476333618,2017-01-01 06:02:31.259730688,37.216602
3,2017-01-01 06:03:09.476333618,2017-01-01 06:02:32.259725312,37.216608
4,2017-01-01 06:03:10.476333618,2017-01-01 06:02:33.259719936,37.216613
...,...,...,...
98,2017-01-01 06:04:44.476333618,2017-01-01 06:04:07.259732992,37.216600
99,2017-01-01 06:04:45.476333618,2017-01-01 06:04:08.259727616,37.216606
100,2017-01-01 06:04:46.476333618,2017-01-01 06:04:09.259722240,37.216611
101,2017-01-01 06:04:47.476333618,2017-01-01 06:04:10.259716864,37.216616
