# BIOMASS TDS

In [1]:
from dataclasses import dataclass

import numpy as np
import pandas as pd
from biotm import const, tm

%reload_ext watermark
%watermark -iv

pandas: 1.2.4
biotm : 0.8rc4
numpy : 1.20.2



In [2]:
def load_simulation(file: str) -> pd.DataFrame:
    simulation = pd.read_excel(file)
    if 'Unnamed: 0' in simulation:
        simulation.drop(columns=['Unnamed: 0'], inplace=True)
    simulation.rename(columns={'DataTake_ID_uint': 'DTID'}, inplace=True)
    simulation['GUANN'] = simulation['Ground_User_Annotation'].apply(lambda s: int(s, 2))
    simulation.drop(columns=['Ground_User_Annotation', 'DataTake_ID'], inplace=True)
    guann = [tm.asdict(tm.GroundUserAnnotation.frombytes((i).to_bytes(4, 'big')))
             for i in simulation['GUANN']]
    for n in range(len(guann)):
        guann[n]['phase'] = guann[n]['phase'].name
        del guann[n]['spare']
    guann = pd.DataFrame(guann)
    return pd.concat([simulation, guann], axis=1)


@dataclass
class Grid:
    
    anx0: pd.Timestamp
    anx1: pd.Timestamp
    length: float
    initial_overlap: float
    final_overlap: float
    num_intervals: int
        
    def interval(self, n: int) -> pd.Interval:
        if n < 1 or n > self.num_intervals:
            raise ValueError(f'interval number must be included in [0, {self.num_intervals}]')
        left = self.anx0 + pd.to_timedelta((n - 1) * self.length - self.initial_overlap, 's')
        if n == self.num_intervals:
            right = self.anx1 + pd.to_timedelta(self.final_overlap, 's')
        else:
            right = self.anx0 + pd.to_timedelta(n * self.length + self.final_overlap, 's')
        return pd.Interval(left, right, closed='both')


## Simulation

In [3]:
simulation = load_simulation('LOAD_TDS_DataTakes_Info.xlsx')

simulation.head(10)

Unnamed: 0,Start,End,Duration,Satellite,SensorMode,Width,Length,SwathArea,Pass,NW_Lat,...,TAnx,DTID,GUANN,phase,drift,gcid,mcid,rcid,track,extcal
0,2017-03-02 06:07:23.674,2017-03-02 06:09:37.671,133.9976,BIOMASS,Stripmap S3,45.726444,920.783021,42272.382844,ASCENDING,13.000217,...,92.159289,7217244,3259367936,INT,False,1,2,3,1,0
1,2017-03-02 06:58:33.629,2017-03-02 06:59:05.629,32.0,BIOMASS,Stripmap S3,46.066408,218.881987,10115.599995,DESCENDING,-14.108058,...,3162.114872,7220314,3259367936,INT,False,1,2,3,1,0
2,2017-03-02 07:00:13.784,2017-03-02 07:00:45.784,32.0,BIOMASS,Stripmap S3,45.972868,218.602462,10129.389084,DESCENDING,-20.193068,...,3262.26956,7220414,3259367936,INT,False,1,2,3,1,0
3,2017-03-02 07:12:37.222,2017-03-02 07:26:17.823,820.600904,BIOMASS,Stripmap S3,47.620117,5547.132422,263829.89924,DESCENDING,-65.000022,...,4005.707629,7221157,3259367936,INT,False,1,2,3,1,0
4,2017-03-02 08:33:54.287,2017-03-02 08:42:30.585,516.297422,BIOMASS,Stripmap S3,46.166357,3527.670568,163412.548811,DESCENDING,-3.748089,...,2991.79112,7228335,3259368448,INT,False,1,2,3,2,0
5,2017-03-02 08:50:48.386,2017-03-02 09:03:54.985,786.598805,BIOMASS,Stripmap S3,47.608375,5317.654509,252827.216195,DESCENDING,-64.913518,...,4005.889653,7229349,3259368448,INT,False,1,2,3,2,0
6,2017-03-02 09:14:49.207,2017-03-02 09:24:04.132,554.925183,BIOMASS,Stripmap S3,45.975852,3798.521783,175210.650837,ASCENDING,5.984286,...,5446.710241,7230790,3259368448,INT,False,1,2,3,2,0
7,2017-03-02 09:52:31.728,2017-03-02 09:54:50.576,138.847848,BIOMASS,Stripmap S3,46.615131,959.617035,44629.25671,DESCENDING,66.013063,...,1818.250459,7235354,3259368960,INT,False,1,2,3,3,0
8,2017-03-02 09:55:59.171,2017-03-02 09:59:11.402,192.231047,BIOMASS,Stripmap S3,46.267302,1327.417554,61320.330305,DESCENDING,54.328795,...,2025.693823,7235561,3259368960,INT,False,1,2,3,3,0
9,2017-03-02 10:07:06.702,2017-03-02 10:09:14.761,128.058868,BIOMASS,Stripmap S3,45.030473,880.189203,40357.953185,DESCENDING,14.404644,...,2693.224739,7236229,3259368960,INT,False,1,2,3,3,0


## Acquisition parameters

Data take selected: 6

In [4]:
simulation[['Anx', 'Start', 'End', 'Duration']].head(10)

Unnamed: 0,Anx,Start,End,Duration
0,2017-03-02 06:05:51.514,2017-03-02 06:07:23.674,2017-03-02 06:09:37.671,133.9976
1,2017-03-02 06:05:51.514,2017-03-02 06:58:33.629,2017-03-02 06:59:05.629,32.0
2,2017-03-02 06:05:51.514,2017-03-02 07:00:13.784,2017-03-02 07:00:45.784,32.0
3,2017-03-02 06:05:51.514,2017-03-02 07:12:37.222,2017-03-02 07:26:17.823,820.600904
4,2017-03-02 07:44:02.496,2017-03-02 08:33:54.287,2017-03-02 08:42:30.585,516.297422
5,2017-03-02 07:44:02.496,2017-03-02 08:50:48.386,2017-03-02 09:03:54.985,786.598805
6,2017-03-02 07:44:02.496,2017-03-02 09:14:49.207,2017-03-02 09:24:04.132,554.925183
7,2017-03-02 09:22:13.477,2017-03-02 09:52:31.728,2017-03-02 09:54:50.576,138.847848
8,2017-03-02 09:22:13.477,2017-03-02 09:55:59.171,2017-03-02 09:59:11.402,192.231047
9,2017-03-02 09:22:13.477,2017-03-02 10:07:06.702,2017-03-02 10:09:14.761,128.058868


In [5]:
data_take_start = simulation.loc[6, 'Start']
data_take_stop = simulation.loc[6, 'End']

data_take_start, data_take_stop

(Timestamp('2017-03-02 09:14:49.207000'),
 Timestamp('2017-03-02 09:24:04.132000'))

In [6]:
acquisition_date = data_take_stop + pd.to_timedelta(30, 'm')
acquisition_station = 'SV'
acquisition_date

Timestamp('2017-03-02 09:54:04.132000')

In [7]:
anxs = pd.concat(
    [pd.Series(simulation.query('Anx <= @data_take_start')['Anx'].max()),
     simulation.query('@data_take_start <= Anx <= @data_take_stop')['Anx'],
     pd.Series(simulation.query('Anx >= @data_take_stop')['Anx'].min())]
).unique()
anxs = pd.Series(anxs)
anxs

0   2017-03-02 07:44:02.496
1   2017-03-02 09:22:13.477
2   2017-03-02 11:00:24.459
dtype: datetime64[ns]

## Instrument Raw Data

In [8]:
# Time validity = data take interval
validity = pd.Interval(data_take_start, data_take_stop, closed='both')

# Slice grid for orbit with anxs.iloc[0]
sg = Grid(
    anxs.iloc[0],
    anxs.iloc[1],
    const.SLICE_GRID_DURATION,
    const.SLICE_INITIAL_OVERLAP,
    const.SLICE_FINAL_OVERLAP,
    const.NUM_SLICES
)


# Generate sequence of slices discarding those which do not overlap data take
slices = [(n, i)
          for n, i in ((n + 1, sg.interval(n + 1)) for n in range(const.NUM_SLICES))
          if i.overlaps(validity)]

# Slice grid for orbit with anxs.iloc[1]
sg.anx0 = anxs.iloc[1]
sg.anx1 = anxs.iloc[2]


# Add seconds set of slices (next orbit)
slices.extend(
    [(n, i)
     for n, i in ((n + 1, sg.interval(n + 1)) for n in range(const.NUM_SLICES))
     if i.overlaps(validity)]
)

for n in range(len(slices)):
    # slice start must be >= validity start
    start = max(validity.left, slices[n][1].left)
    # slice stop must be <= validity stop
    stop = min(validity.right, slices[n][1].right)
    # update interval
    slices[n] = (
        slices[n][0],
        slices[n][1],
        pd.Interval(start, stop, closed='both')
    )

slices = pd.DataFrame(
    [{'slice': s[0], 't_start': s[1].left, 't_stop': s[1].right, 'start': s[2].left, 'stop': s[2].right}
     for s in slices]
)
slices['duration'] = (slices['stop'] - slices['start']).apply(lambda d: d.total_seconds())

slices

Unnamed: 0,slice,t_start,t_stop,start,stop,duration
0,58,2017-03-02 09:14:13.397041180,2017-03-02 09:16:00.412848920,2017-03-02 09:14:49.207000000,2017-03-02 09:16:00.412848920,71.205848
1,59,2017-03-02 09:15:48.412848920,2017-03-02 09:17:35.428656660,2017-03-02 09:15:48.412848920,2017-03-02 09:17:35.428656660,107.015807
2,60,2017-03-02 09:17:23.428656660,2017-03-02 09:19:10.444464400,2017-03-02 09:17:23.428656660,2017-03-02 09:19:10.444464400,107.015807
3,61,2017-03-02 09:18:58.444464400,2017-03-02 09:20:45.460272140,2017-03-02 09:18:58.444464400,2017-03-02 09:20:45.460272140,107.015807
4,62,2017-03-02 09:20:33.460272140,2017-03-02 09:22:20.477000000,2017-03-02 09:20:33.460272140,2017-03-02 09:22:20.477000000,107.016727
5,1,2017-03-02 09:22:08.477000000,2017-03-02 09:23:55.492807740,2017-03-02 09:22:08.477000000,2017-03-02 09:23:55.492807740,107.015807
6,2,2017-03-02 09:23:43.492807740,2017-03-02 09:25:30.508615480,2017-03-02 09:23:43.492807740,2017-03-02 09:24:04.132000000,20.639192


Scenario generation

In [9]:
scenarios = [
    {
        'name': f'RAW_{pid:03d}_10',
        'file_name': f'l0pfs1{pid}.sh',
        'processor_name': 'procsim',
        'processor_version': '01.01',
        'task_name': 'N/A',
        'task_version': 'N/A',
        'log_level': 'debug',
        'begin_position': validity.left.isoformat() + 'Z',
        'end_position': validity.right.isoformat() + 'Z',
        'acquisition_date': acquisition_date.isoformat() + 'Z',
        'acquisition_station': acquisition_station,
        'baseline': 1,
        'outputs': [
            {
                'file_type': f'RAW_{pid:03d}_10',
            }
        ]
    }
    for pid in [23, 24, 25, 26]
]

## Platform Ancillary Raw Data

In [10]:
# Time validity = [dat atake start - 16s, data take stop]
validity = pd.Interval(data_take_start - pd.to_timedelta(const.PLATFORM_ANCILLARY_INITIAL_MARGIN, 's'), data_take_stop, closed='both')

# Slice grid for orbit with anxs.iloc[0]
sg = Grid(
    anxs.iloc[0],
    anxs.iloc[1],
    const.SLICE_GRID_DURATION,
    const.SLICE_INITIAL_OVERLAP,
    const.SLICE_FINAL_OVERLAP,
    const.NUM_SLICES
)


# Generate sequence of slices discarding those which do not overlap data take
slices = [(n, i)
          for n, i in ((n + 1, sg.interval(n + 1)) for n in range(const.NUM_SLICES))
          if i.overlaps(validity)]

# Slice grid for orbit with anxs.iloc[1]
sg.anx0 = anxs.iloc[1]
sg.anx1 = anxs.iloc[2]


# Add seconds set of slices (next orbit)
slices.extend(
    [(n, i)
     for n, i in ((n + 1, sg.interval(n + 1)) for n in range(const.NUM_SLICES))
     if i.overlaps(validity)]
)

for n in range(len(slices)):
    # slice start must be >= data take start
    start = max(validity.left, slices[n][1].left)
    # slice stop must be <= data take stop
    stop = min(validity.right, slices[n][1].right)
    # update interval
    slices[n] = (
        slices[n][0],
        slices[n][1],
        pd.Interval(start, stop, closed='both')
    )

slices = pd.DataFrame(
    [{'slice': s[0], 't_start': s[1].left, 't_stop': s[1].right, 'start': s[2].left, 'stop': s[2].right}
     for s in slices]
)
slices['duration'] = (slices['stop'] - slices['start']).apply(lambda d: d.total_seconds())

slices

Unnamed: 0,slice,t_start,t_stop,start,stop,duration
0,58,2017-03-02 09:14:13.397041180,2017-03-02 09:16:00.412848920,2017-03-02 09:14:33.207000000,2017-03-02 09:16:00.412848920,87.205848
1,59,2017-03-02 09:15:48.412848920,2017-03-02 09:17:35.428656660,2017-03-02 09:15:48.412848920,2017-03-02 09:17:35.428656660,107.015807
2,60,2017-03-02 09:17:23.428656660,2017-03-02 09:19:10.444464400,2017-03-02 09:17:23.428656660,2017-03-02 09:19:10.444464400,107.015807
3,61,2017-03-02 09:18:58.444464400,2017-03-02 09:20:45.460272140,2017-03-02 09:18:58.444464400,2017-03-02 09:20:45.460272140,107.015807
4,62,2017-03-02 09:20:33.460272140,2017-03-02 09:22:20.477000000,2017-03-02 09:20:33.460272140,2017-03-02 09:22:20.477000000,107.016727
5,1,2017-03-02 09:22:08.477000000,2017-03-02 09:23:55.492807740,2017-03-02 09:22:08.477000000,2017-03-02 09:23:55.492807740,107.015807
6,2,2017-03-02 09:23:43.492807740,2017-03-02 09:25:30.508615480,2017-03-02 09:23:43.492807740,2017-03-02 09:24:04.132000000,20.639192


In [11]:
scenarios = [
    {
        'name': f'RAW_{pid:03d}_10',
        'file_name': f'l0pfs1{pid}.sh',
        'processor_name': 'procsim',
        'processor_version': '01.01',
        'task_name': 'N/A',
        'task_version': 'N/A',
        'log_level': 'debug',
        'begin_position': validity.left.isoformat() + 'Z',
        'end_position': validity.right.isoformat() + 'Z',
        'acquisition_date': acquisition_date.isoformat() + 'Z',
        'acquisition_station': acquisition_station,
        'num_isp_erroneous'
        'baseline': 1,
        'outputs': [
            {
                'file_type': f'RAW_{pid:03d}_10',
            }
        ]
    }
    for pid in [22]
]