## Data access notebook for AMT inherent optical properties and HPLC pigments

This notebook provides examples of data access for Inherent Optical Properties and HPLC phytoplankton pigment concentrations from the AMT cruises. Examples are provided for SeaBASS (available at https://seabass.gsfc.nasa.gov/archive/PML/AMT) and NetCDF data formats (available at https://zenodo.org/records/12527954). The data set is fully described in Jordan et al. (2024), A compilation of surface inherent optical properties and phytoplankton pigment concentrations from the Atlantic Meridional Transect, submitted to Earth System Science Data.


### 1. Data access from SeaBASS files
Each cruise has a separate SeaBASS files for each ACS, AC9 (if present) and pigment data set that were collected. AMT 28 is used as an example below.

In [1]:
import SB_support as sbs # the SB support module is a recommended file reader for SeaBASS files in python 

In [2]:
# directory for SeaBASS datafiles
data_dir_sb = '/users/rsg/tjor/scratch_network/AMT_underway/AMT28/Source/SeaBASS_submit/sb_processed/' 

### 1.1 ACS data

In [3]:
fn_sb = data_dir_sb + 'AMT28_InLine0_ACS_20180925_20181018_Particulate_v20240531.sb'
acs = sbs.readSB(fn_sb, no_warn = True)

In [4]:
# subset of headers which are variable names
print(acs.variables)

OrderedDict([('date', ('date', 'yyyymmdd')), ('time', ('time', 'hh:mm:ss')), ('lat', ('lat', 'degrees')), ('lon', ('lon', 'degrees')), ('wt', ('wt', 'degreesc')), ('sal', ('sal', 'psu')), ('ap400.0', ('ap400.0', '1/m')), ('ap402.0', ('ap402.0', '1/m')), ('ap404.0', ('ap404.0', '1/m')), ('ap406.0', ('ap406.0', '1/m')), ('ap408.0', ('ap408.0', '1/m')), ('ap410.0', ('ap410.0', '1/m')), ('ap412.0', ('ap412.0', '1/m')), ('ap414.0', ('ap414.0', '1/m')), ('ap416.0', ('ap416.0', '1/m')), ('ap418.0', ('ap418.0', '1/m')), ('ap420.0', ('ap420.0', '1/m')), ('ap422.0', ('ap422.0', '1/m')), ('ap424.0', ('ap424.0', '1/m')), ('ap426.0', ('ap426.0', '1/m')), ('ap428.0', ('ap428.0', '1/m')), ('ap430.0', ('ap430.0', '1/m')), ('ap432.0', ('ap432.0', '1/m')), ('ap434.0', ('ap434.0', '1/m')), ('ap436.0', ('ap436.0', '1/m')), ('ap438.0', ('ap438.0', '1/m')), ('ap440.0', ('ap440.0', '1/m')), ('ap442.0', ('ap442.0', '1/m')), ('ap444.0', ('ap444.0', '1/m')), ('ap446.0', ('ap446.0', '1/m')), ('ap448.0', ('ap448.

In [5]:
# IOP data are stored in individual wavelngth bins, and an example of data access at 440 nm is shown below:
ap_440 = acs.data['ap440.0'] # particulate absorption
bp_440 = acs.data['ap440.0'] # particulate scattering
cp_440 = acs.data['cp440.0'] # particulate beam attenuation


ap_440_u = acs.data['ap440.0_unc'] # particulate absorption uncertainty
bp_440_u = acs.data['ap440.0_unc'] # particulate scattering uncertainty
cp_440_u = acs.data['cp440.0_unc'] # particulate beam attenuation uncertainty

In [6]:
# The chl line-height data is (post HPLC calibration) is accessed as follows:
chl_acs = acs.data['chl_lineheight']

In [7]:
# Corresponding times, dates, and geographic coordinates are acessed as follows:
time = acs.data['time'] 
date = acs.data['date'] 
lat = acs.data['lat'] 
lon = acs.data['lon'] 


### 1.2 AC9 data

In [8]:
fn_sb = data_dir_sb + 'AMT28_InLine0_AC9_20180925_20181028_Particulate_v20240531.sb'
ac9 = sbs.readSB(fn_sb, no_warn = True)

In [9]:
# subset of headers which are variable names. These headers can be used as keys to access data, similar to the ACS example above
print(ac9.variables)

OrderedDict([('date', ('date', 'yyyymmdd')), ('time', ('time', 'hh:mm:ss')), ('lat', ('lat', 'degrees')), ('lon', ('lon', 'degrees')), ('wt', ('wt', 'degreesc')), ('sal', ('sal', 'psu')), ('ap412.0', ('ap412.0', '1/m')), ('ap440.0', ('ap440.0', '1/m')), ('ap488.0', ('ap488.0', '1/m')), ('ap510.0', ('ap510.0', '1/m')), ('ap532.0', ('ap532.0', '1/m')), ('ap554.0', ('ap554.0', '1/m')), ('ap650.0', ('ap650.0', '1/m')), ('ap676.0', ('ap676.0', '1/m')), ('ap715.0', ('ap715.0', '1/m')), ('ap412.0_unc', ('ap412.0_unc', '1/m')), ('ap440.0_unc', ('ap440.0_unc', '1/m')), ('ap488.0_unc', ('ap488.0_unc', '1/m')), ('ap510.0_unc', ('ap510.0_unc', '1/m')), ('ap532.0_unc', ('ap532.0_unc', '1/m')), ('ap554.0_unc', ('ap554.0_unc', '1/m')), ('ap650.0_unc', ('ap650.0_unc', '1/m')), ('ap676.0_unc', ('ap676.0_unc', '1/m')), ('ap715.0_unc', ('ap715.0_unc', '1/m')), ('bp412.0', ('bp412.0', '1/m')), ('bp440.0', ('bp440.0', '1/m')), ('bp488.0', ('bp488.0', '1/m')), ('bp510.0', ('bp510.0', '1/m')), ('bp532.0', ('

In [10]:
### 1.3 HPLC data
fn_sb = data_dir_sb + 'AMT28_HPLC_20180926_20181027_v20240531.sb'
hplc = sbs.readSB(fn_sb, no_warn = True)

In [11]:
# subset of headers which are variable names. 
print(hplc.variables)

OrderedDict([('sample', ('sample', 'none')), ('station', ('station', 'none')), ('depth', ('depth', 'm')), ('lat', ('lat', 'degrees')), ('lon', ('lon', 'degrees')), ('year', ('year', 'yyyy')), ('month', ('month', 'mo')), ('day', ('day', 'dd')), ('time', ('time', 'hh:mm:ss')), ('bottle', ('bottle', 'none')), ('volfilt', ('volfilt', 'l')), ('allo', ('allo', 'ug/l')), ('alpha-beta-car', ('alpha-beta-car', 'ug/l')), ('but-fuco', ('but-fuco', 'ug/l')), ('chl_c1c2', ('chl_c1c2', 'ug/l')), ('chl_c3', ('chl_c3', 'ug/l')), ('chlide_a', ('chlide_a', 'ug/l')), ('diadino', ('diadino', 'ug/l')), ('diato', ('diato', 'ug/l')), ('dp', ('dp', 'ug/l')), ('dv_chl_a', ('dv_chl_a', 'ug/l')), ('fuco', ('fuco', 'ug/l')), ('hex-fuco', ('hex-fuco', 'ug/l')), ('lut', ('lut', 'ug/l')), ('mv_chl_a', ('mv_chl_a', 'ug/l')), ('neo', ('neo', 'ug/l')), ('perid', ('perid', 'ug/l')), ('phide_a', ('phide_a', 'ug/l')), ('phytin_a', ('phytin_a', 'ug/l')), ('ppc', ('ppc', 'ug/l')), ('pras', ('pras', 'ug/l')), ('psc', ('psc',

In [12]:
# Primary pigments can be accessed as follows:

# Chlorophylls
Tot_Chl_a = hplc.data['tot_chl_a']
Tot_Chl_b = hplc.data['tot_chl_b']
Tot_Chl_c = hplc.data['tot_chl_c']

# Photoprotective carotenoids 
Allo = hplc.data['allo']
Alpha_beta_car =hplc.data['alpha-beta-car']
Diato = hplc.data['diato']
Diadino = hplc.data['diadino']
Zea = hplc.data['zea']
PPC = hplc.data['ppc']

# Photosynthetic carotenoids
But_fuco = hplc.data['but-fuco']
Fuco = hplc.data['fuco']
Hex_fuco = hplc.data['hex-fuco']
Period = hplc.data['perid']
PSC = hplc.data['psc']

In [13]:
# Corresponding times, dates (year, month, day) and geographic coordinates are acessed as follows:

time = hplc.data['time'] 
date = hplc.data['year'] 
date = hplc.data['month'] 
date = hplc.data['day'] 
lat = hplc.data['lat'] 
lon = hplc.data['lon'] 

### 2. Data access from NetCDF files

Each cruise has a separate NetCDF file, which contains data associated with Inherent Optical Properties (ACS systems, AC9 systems if present, underway meta data) and HPLC pigments (pigment concentrations and associated metadata).

In [14]:
import xarray as xr # the xarray module is recommended to open the NetCDF files in python

In [15]:
# directory for NetCDF data files
data_dir_nc = '/data/datasets/cruise_data/active/ACS_Chl/AMT_ACS_2023reprocessing/AMT/'

### 2.1  IOP data: cruises with a single ACS system 
 AMT 29, AMT 27, AMT 26, AMT 23 and AMT 22 had a single ACS system and no AC9 system. AMT 27 is used as an example
 

In [16]:
fn_nc = 'amt27_final_with_debiased_chl.nc'
data_nc = xr.open_dataset(data_dir_nc + fn_nc)

In [17]:
# These are the complete set of data keys 
keys = data_nc.keys() 
print(keys)

KeysView(<xarray.Dataset>
Dimensions:                      (time: 36482, acs_wv: 176, ac9_wv: 9,
                                  bb3_wv: 3, hplc_time: 48)
Coordinates:
  * time                         (time) datetime64[ns] 2017-09-24T16:10:59.99...
  * acs_wv                       (acs_wv) float64 400.0 402.0 ... 748.0 750.0
  * ac9_wv                       (ac9_wv) float64 nan nan nan ... nan nan nan
  * bb3_wv                       (bb3_wv) float64 470.0 532.0 700.0
  * hplc_time                    (hplc_time) datetime64[ns] 2017-09-24T17:27:...
Data variables: (12/92)
    flow                         (time) float64 ...
    acs_chl                      (time) float64 ...
    acs_ap                       (time, acs_wv) float64 ...
    acs_ap_u                     (time, acs_wv) float64 ...
    acs_bp                       (time, acs_wv) float64 ...
    acs_bp_u                     (time, acs_wv) float64 ...
    ...                           ...
    hplc_Tpg                     (hplc

In [18]:
# These are the keys associated with the ACS system 
acs_keys = [s for s in keys  if "acs" in s]
print(acs_keys)

['acs_chl', 'acs_ap', 'acs_ap_u', 'acs_bp', 'acs_bp_u', 'acs_cp', 'acs_cp_u', 'acs_N', 'acs_chl_debiased', 'acs_chl_nomedfilt', 'acs_chl_debiased_nomedfilt']


In [19]:
# These are the ACS IOP data fields and uncertainties

ap = data_nc['acs_ap'] # particulate absorption
bp = data_nc['acs_bp'] # particulate scattering
cp = data_nc['acs_cp'] # particulate beam attenuation

ap_u = data_nc['acs_ap_u']  # particulate absorption uncertainty
bp_u = data_nc['acs_bp_u']  # particulate scattering uncertainty
cp_u = data_nc['acs_cp_u']  # particulate beam attenuation uncertainty

wl = data_nc['acs_wv'] # wavelength of ACS IOP data
time = data_nc['time'] # timestamp of ACS IOP data


In [20]:
# These are the HPLC-callibrated (debiased) Tot_Chl_a transects

chl_acs =  data_nc['acs_chl_debiased'] # transect with median filter (30 min kernel) as used in callibration match-ups 
chl_acs =  data_nc['acs_chl_debiased_nomedfilt'] #  unfiltered transect (as saved in SeaBASS files)

print(data_nc['acs_chl_debiased']) # data regarding the calibration are stored as attributes

<xarray.DataArray 'acs_chl_debiased' (time: 36482)>
[36482 values with dtype=float64]
Coordinates:
  * time     (time) datetime64[ns] 2017-09-24T16:10:59.999997440 ... 2017-11-...
Attributes:
    debiasing_equation:   acs.acs_chl_deiased = acs.acs_chl*(1-delta)
    delta:                -0.07378529880943258
    sigma:                0.11944447655085341
    units:                mg/m3
    comments:             delta=np.nanmedian(rres), sigma=prcrng(rres), rres=...
    HPLC_Tot_chla:        [0.31251738 0.28942639 0.21157547 0.10724928 0.0775...
    HPLC_Tot_chla_units:  mg/m3
    acs_chl:              [0.54988796 0.24047638 0.16435336 0.19168315 0.0771...
    acs_chl_units:        mg/m3
    processed_on:         2024-02-27 13:41:40.862009
    match_up_dates:       ['2017-09-24 17:27:00', '2017-09-25 11:57:00', '201...


In [21]:
# These are the keys associated with underway meta data, which have the same temporal binning as the ACS data
uway_keys = [s for s in keys if "uway" in s]
print(uway_keys)

['uway_lat', 'uway_gndcourse', 'uway_heading', 'uway_gndspeed', 'uway_roll', 'uway_pitch', 'uway_heave', 'uway_depthm', 'uway_flowrate', 'uway_fluo', 'uway_trans', 'uway_wind_vel', 'uway_air_temp', 'uway_wind_dir', 'uway_humidity', 'uway_baro', 'uway_par1', 'uway_tir1', 'uway_par2', 'uway_tir2', 'uway_sal', 'uway_sst', 'uway_thermosalinograph_temp', 'uway_conductivity', 'uway_lon']


In [22]:
# Geographic coordinates
lat =  data_nc['uway_lat'] 
lon =  data_nc['uway_lon'] 

# Sea surface temperature and salinity
sst =  data_nc['uway_sst'] 
sal =  data_nc['uway_sal'] 

### 2.2 IOP data: cruises with two ACS systems

AMT 24 and AMT 25 had 2 ACS systems and no AC9 system. AMT 25 is used as an example.

In [23]:
fn_nc = 'amt25_final_with_debiased_chl.nc'
data_nc = xr.open_dataset(data_dir_nc + fn_nc)
keys = data_nc.keys()

In [24]:
# These are the keys associated with the first ACS system 
acs_keys = [s for s in keys  if "acs" in s]
print(acs_keys)

# These are the keys associated with the second ACS system 
acs2_keys = [s for s in keys  if "acs2" in s]
print(acs2_keys)

['acs_chl', 'acs_ap', 'acs_ap_u', 'acs_bp', 'acs_bp_u', 'acs_cp', 'acs_cp_u', 'acs_N', 'acs2_chl', 'acs2_ap', 'acs2_ap_u', 'acs2_bp', 'acs2_bp_u', 'acs2_cp', 'acs2_cp_u', 'acs2_N', 'acs_chl_debiased', 'acs2_chl_debiased', 'acs_chl_nomedfilt', 'acs_chl_debiased_nomedfilt', 'acs2_chl_nomedfilt', 'acs2_chl_debiased_nomedfilt']
['acs2_chl', 'acs2_ap', 'acs2_ap_u', 'acs2_bp', 'acs2_bp_u', 'acs2_cp', 'acs2_cp_u', 'acs2_N', 'acs2_chl_debiased', 'acs2_chl_nomedfilt', 'acs2_chl_debiased_nomedfilt']


In [25]:
# Data from the first ACS system are acccessed as before.
# Data from the second ACS system, are accessed using `acs2_XXX' as a key identifier.

ap = data_nc['acs_ap'] # particulate absorption - system 1
ap_u = data_nc['acs_ap_u']  # particulate absorption uncertainty -system 2

ap = data_nc['acs2_ap'] # particulate absorption - system 1
ap_u = data_nc['acs2_ap_u']  # particulate absorption uncertainty - system 3

wl = data_nc['acs_wv'] # wavelength of ACS IOP data - system 1
wl = data_nc['acs2_wv'] # wavelength of ACS IOP data - system 2
time = data_nc['time'] # timestamp of ACS IOP data for both systems (acs and acs2 data are nan-padded where data is missing)

###  2.3 IOP data: cruises with an AC9 system


AMT 19 and AMT 28 had and ACS and AC9 system. AMT 28 is used as an example for AC9 data access.

In [26]:
fn_nc = 'amt28_final_with_debiased_chl.nc'
data_nc = xr.open_dataset(data_dir_nc + fn_nc)


In [27]:
# These are the keys associated with the AC9 system 
ac9_keys = [s for s in keys  if "ac9" in s]
print(ac9_keys)

[]


In [28]:
# Similar to the ACS data, AC9 IOP data can be accessed as follows:
ap = data_nc['ac9_ap'] # particulate absorption
ap_u = data_nc['ac9_ap_u']  # particulate absorption uncertainty

wl = data_nc['ac9_wv'] # wavelength of ACS IOP data
time = data_nc['time'] # timestamp of ACS IOP data

In [29]:
# The HPLC-callibrated (debiased) Tot_Chl_a transects for AMT 19 and 28 combine ACS and AC9 line-height estimates.
# These data can be accessed as follows:

chl_acx =  data_nc['acx_chl_debiased'] # transect with median filter (30 min kernel) as used in callibration match-ups 
chl_acx =  data_nc['acx_chl_debiased_nomedfilt'] #  unfiltered transect (as saved in SeaBASS files)

print(data_nc['acx_chl_debiased']) # data regarding the calibration are stored as attributes

<xarray.DataArray 'acx_chl_debiased' (time: 33331)>
[33331 values with dtype=float64]
Coordinates:
  * time     (time) datetime64[ns] 2018-09-25T21:11:00.000000768 ... 2018-10-...
Attributes:
    debiasing_equation:   acs.acx_chl_deiased = acs.acx_chl*(1-delta)
    delta:                -0.08622876858169187
    sigma:                0.15975085813203055
    units:                mg/m3
    comments:             delta=np.nanmedian(rres), sigma=prcrng(rres), rres=...
    HPLC_Tot_chla:        [  nan   nan   nan   nan 0.706   nan   nan   nan 0....
    HPLC_Tot_chla_units:  mg/m3
    acx_chl:              [       nan 0.43558088 0.46063176 0.45621889 0.5321...
    acx_chl_units:        mg/m3
    processed_on:         2024-02-29 15:46:22.453958
    match_up_dates:       ['2018-09-25 12:35:00', '2018-09-26 03:34:00', '201...


### 2.4 HPLC data.
HPLC data is stored in a similar way within each NetCDF file.

In [30]:
fn_nc = 'amt28_final_with_debiased_chl.nc'
data_nc = xr.open_dataset(data_dir_nc + fn_nc)

In [31]:
# These are the keys associated with HPLC pigments
hplc_keys = [s for s in keys  if "hplc" in s]
print(hplc_keys)


['hplc_label', 'hplc_sample_collector', 'hplc_cruise_name', 'hplc_volume_filtered_(l)', 'hplc_name_of_water_body', 'hplc_depth', 'hplc_filter_storage_before_analysis', 'hplc_filter_type', 'hplc_filter_diameter', 'hplc_Tot_Chl_a', 'hplc_Tot_Chl_b', 'hplc_Tot_Chl_c', 'hplc_Alpha-beta-Car', 'hplc_Allo', 'hplc_But-fuco', 'hplc_Diadino', 'hplc_Diato', 'hplc_Fuco', 'hplc_Hex-fuco', 'hplc_Perid', 'hplc_Zea', 'hplc_Chl_a', 'hplc_DV_Chl_a', 'hplc_Chlide_a', 'hplc_Mg_DVP', 'hplc_PML_only_Chl_b_DVChl_b', 'hplc_Chl_c1', 'hplc_Chl_c2', 'hplc_Chl_c3', 'hplc_beta-epi-Car', 'hplc_beta-beta-Car', 'hplc_Lut', 'hplc_Neo', 'hplc_Viola', 'hplc_Pras', 'hplc_PML_only_Anth', 'hplc_Asta', 'hplc_blank_intentionally', 'hplc_TChl', 'hplc_PPC', 'hplc_PSC', 'hplc_PSP', 'hplc_Tacc', 'hplc_Tpg', 'hplc_station', 'hplc_ctd', 'hplc_bottle', 'hplc_sample_detph_(m)', 'hplc_lat', 'hplc_lon', 'hplc_variable', 'hplc_volume_(l)', 'hplc_replicate_(a_=_no,_b_=_yes)', 'hplc_name', 'hplc_cryobox_hplc', 'hplc_notes', 'hplc_n_=_oth

In [32]:
# Primary pigments can be accessed as follows:

# Chlorophylls
Tot_Chl_a = data_nc['hplc_Tot_Chl_a']
Tot_Chl_b = data_nc['hplc_Tot_Chl_b']
Tot_Chl_c = data_nc['hplc_Tot_Chl_c']

# Photoprotective carotenoids 
Allo = data_nc['hplc_Allo']
Alpha_beta_car = data_nc['hplc_Alpha-beta-Car']
Diato = data_nc['hplc_Diato']
Diadino = data_nc['hplc_Diadino']
Zea = data_nc['hplc_Zea']
PPC = data_nc['hplc_PPC']

#Photosynthetic carotenoids
But_fuco = data_nc['hplc_But-fuco']
Fuco = data_nc['hplc_Fuco']
Hex_fuco = data_nc['hplc_Hex-fuco']
Period = data_nc['hplc_Perid']
PSC = data_nc['hplc_PSC']

In [33]:
# Lat, lon, timestamps for HPLC samples can be accessed as folows:
hplc_lat = data_nc['hplc_lat']
hplc_lon = data_nc['hplc_lon']
hplc_time = data_nc['hplc_time']

# Other metadata fields are also available (see previous keys list)
