# Using EcoFOCIpy to process raw field data

## Cruise ID - SH2202L(1|2)

## BTL Data + Nutrient Data

This is a streamlined version of generation routines to merge bottle data and Mordy Nut. Lab Nutrient Data for long term archive

In [1]:
import yaml
import glob
import pandas as pd

import EcoFOCIpy.io.sbe_ctd_parser as sbe_ctd_parser #<- instrument specific
import EcoFOCIpy.io.ncCFsave as ncCFsave
import EcoFOCIpy.metaconfig.load_config as load_config

In [11]:
sample_data_dir = '/Users/bell/ecoraid/2022/CTDcasts/sh2202l2/' #root path to cruise directory
ecofocipy_dir = '/Users/bell/Programs/EcoFOCIpy/'

In [12]:
###############################################################
# edit to point to {cruise sepcific} raw datafiles 
datafile = sample_data_dir+'rawconverted/' #<- point to cruise and process all files within
nutdatafile = sample_data_dir+'working/DiscreteNutrients/SH2202 Nutrient Data.txt' #<- point to cruise and process all files within
cruise_name = 'sh2202l2' #no hyphens
cruise_meta_file = sample_data_dir+'logs/sh2202l2.yaml'
inst_meta_file = sample_data_dir+'logs/FOCI_standard_CTDpNutsWOCE.yaml' #<- copy to each deployment for simplicity?
group_meta_file = ecofocipy_dir+'staticdata/institutional_meta_example.yaml'
###############################################################

#init and load data
cruise = sbe_ctd_parser.sbe_btl()
filename_list = sorted(glob.glob(datafile + '*.btl'))

cruise_data = cruise.manual_parse(filename_list)

Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd018.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd019.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd020.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd021.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd022.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd023.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd024.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd025.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd026.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd027.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd028.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconverted/ctd029.btl
Processing /Users/bell/ecoraid/2022/CTDcasts/sh2202l2/rawconvert

## Load csv Nutrient File

In [13]:
nut_data = pd.read_csv(nutdatafile,delimiter='\t')
nut_data

Unnamed: 0,Cast,Niskin,PO4 (uM),Sil (uM),NO3 (uM),NO2 (uM),NH4 (uM)
0,1,1,2.929,76.7400,31.1835,0.0165,0.4900
1,1,3,2.461,55.1560,27.1570,0.0160,0.4000
2,1,4,2.359,48.4730,28.2110,0.0170,0.4600
3,1,5,2.185,27.8350,19.7070,0.0290,0.3300
4,1,6,1.754,25.5880,18.5290,0.0180,0.4600
...,...,...,...,...,...,...,...
256,51,6,0.885,3.3780,5.6010,0.3250,0.2550
257,51,7,0.800,2.7770,4.3580,0.1940,0.2330
258,51,8,0.772,2.9005,3.9220,0.1780,0.2365
259,51,9,0.760,2.2250,3.3290,0.1565,0.2230


## Merge Bottle and Nutrient Data but drop non nutrient vars?

In [14]:
keep_param = ['bottle','prdm']
# keep_param = ['bottle','prsm']

for cast,cdata in cruise_data.items():
    try:
        matchcast = int((cast.split('.')[0]).lower().split('ctd')[-1])
        cruise_data[cast] = pd.merge(nut_data[nut_data['Cast']==matchcast],cdata.reset_index()[keep_param],right_on='bottle',left_on='Niskin').set_index('bottle').drop(columns=['Cast'])
    except:
        continue

## Add Deployment meta information

In [15]:
#just a dictionary of dictionaries - simple
with open(cruise_meta_file) as file:
    cruise_config = yaml.full_load(file)

## Add Instrument meta information

Time, depth, lat, lon should be added regardless (always our coordinates) but for a mooring site its going to be a (1,1,1,t) dataset
The variables of interest should be read from the data file and matched to a key for naming.  That key is in the inst_config file seen below and should represent common conversion names in the raw data

In [16]:
with open(inst_meta_file) as file:
    inst_config = yaml.full_load(file)

## Add institutional meta-information


In [17]:
with open(group_meta_file) as file:
    group_config = yaml.full_load(file)

## Save CF Netcdf files

Currently stick to netcdf3 classic... but migrating to netcdf4 (default) may be no problems for most modern purposes.  Its easy enough to pass the `format` kwargs through to the netcdf api of xarray.

In [18]:
#loop over all casts and perform tasks shown above

for cast in cruise_data.keys():
    try:
        cruise_data[cast] = cruise_data[cast].rename(columns={
                            'Sil (uM)':'SI',
                            'PO4 (uM)':'PO4',
                            'NO2 (uM)':'NO2', 
                            'NO3 (uM)':'NO3',
                            'NH4 (uM)':'NH4',
                            'Sil Flag':'SI_WOCE_FLAG',
                            'PO4 Flag':'PO4_WOCE_FLAG',
                            'NO2 Flag':'NO2_WOCE_FLAG', 
                            'NO3 Flag':'NO3_WOCE_FLAG',
                            'NH4 Flag':'NH4_WOCE_FLAG',
                            'Niskin':'BTLID',
                            'prdm':'pressure',
                            'empty':'empty', #this will be ignored
                            'flag':'flag'})

        cruise_data_nc = ncCFsave.EcoFOCI_CFnc(df=cruise_data[cast], 
                                    instrument_yaml=inst_config, 
                                    operation_yaml=cruise_config,
                                    operation_type='ctd')

        cruise_data_nc.expand_dimensions(dim_names=['latitude','longitude','time'],geophys_sort=False)

        cruise_data_nc.variable_meta_data(variable_keys=list(cruise_data[cast].columns.values),drop_missing=False)
        #adding dimension meta needs to come after updating the dimension values... BUG?
        cruise_data_nc.dimension_meta_data(variable_keys=['time','latitude','longitude'])
        cruise_data_nc.temporal_geospatioal_meta_data_ctd(positiveE=False,conscastno=cast.split('.')[0])

        #add global attributes
        cruise_data_nc.deployment_meta_add(conscastno=cast.split('.')[0].upper())

        #add instituitonal global attributes
        cruise_data_nc.institution_meta_add(group_config)

        #add creation date/time - provenance data
        cruise_data_nc.provinance_meta_add()

        #provide intial qc status field
        cruise_data_nc.qc_status(qc_status='excellent') #<- options are unknown, excellent, probably good, mixed, unqcd

        cast = cast.lower().split('d')[-1].split('.')[0]
        cruise_data_nc.xarray2netcdf_save(xdf = cruise_data_nc.get_xdf(),
                                   filename=cruise_name+'c'+cast.zfill(3)+'_nut.nc',format="NETCDF3_CLASSIC")
    except:
        print(f'Skipping {cast}')

Skipping 018
Skipping 020
Skipping 022
Skipping 024
Skipping 026
Skipping 028
Skipping 030
Skipping 032
Skipping 034
Skipping 036
Skipping 038
Skipping 040
Skipping 042
Skipping 044
Skipping 046
Skipping 048
Skipping 050


## Next Steps

QC of data (plot parameters with other instruments)
- be sure to updated the qc_status and the history