# Using EcoFOCIpy to process raw field data

## Cruise ID
{nw16xx, dy17xx, hx}

## BTL Data + Nutrient Data

This is a streamlined version of generation routines to merge bottle data and Mordy Nut. Lab Nutrient Data for long term archive

In [27]:
import yaml
import glob
import pandas as pd
import os

import EcoFOCIpy.io.sbe_ctd_parser as sbe_ctd_parser #<- instrument specific
import EcoFOCIpy.io.ncCFsave as ncCFsave
import EcoFOCIpy.metaconfig.load_config as load_config

In [28]:
sample_data_dir = '/Users/bell/ecoraid/2004/CTDcasts/hx288/' #root path to cruise directory
ecofocipy_dir = '/Users/bell/Programs/EcoFOCIpy/'

In [29]:
###############################################################
# edit to point to {cruise sepcific} raw datafiles 
datafile = sample_data_dir+'rawconverted/' #<- point to cruise and process all files within
nutdatafile = sample_data_dir+'working/DiscreteNutrients/HX288 Nutrient Data.txt' #<- point to cruise and process all files within
cruise_name = 'hx288' #no hyphens
cruise_meta_file = sample_data_dir+'logs/hx288.yaml'
inst_meta_file = sample_data_dir+'logs/FOCI_standard_CTDpNutsWOCE.yaml' #<- copy to each deployment for simplicity?
group_meta_file = ecofocipy_dir+'staticdata/institutional_meta_example.yaml'
###############################################################

#init and load data
cruise = sbe_ctd_parser.sbe_btl()
filename_list = sorted(glob.glob(datafile + '*.btl'))

cruise_data = cruise.manual_parse(filename_list)

Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd001.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd002.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd003.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd004.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd005.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd006.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd007.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd008.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd009.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd010.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd011.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd012.btl
Processing /Users/bell/ecoraid/2004/CTDcasts/hx288/rawconverted/ctd013.btl
Processing /Users/bell/ec

In [30]:
cruise_data['ctd001.btl']

Unnamed: 0_level_0,sal00,sal11,sigma-t00,prdm,t090c,t190c,v0,v2,v5,datetime
bottle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,32.5486,32.5317,25.5515,60.817,6.5155,6.5209,0.1171,0.2288,3.9316,2004-07-26 22:37:44
2.0,32.0727,32.0569,24.8818,6.876,8.6381,8.6435,2.8636,2.5606,2.5634,2004-07-26 22:39:43
3.0,32.0534,32.0369,24.6978,1.725,9.7189,9.6796,0.8662,3.3834,2.9178,2004-07-26 22:40:02


## Load csv Nutrient File

In [31]:
nut_data = pd.read_csv(nutdatafile,delimiter='\t')
nut_data

Unnamed: 0,Cast,Niskin,PO4 (uM),PO4 Flag,Sil (uM),Sil Flag,NO3 (uM),NO3 Flag,NO2 (uM),NO2 Flag,NH4 (uM),NH4 Flag
0,1,1.0,1.432000e+00,2,28.1,2,14.2,2,0.23,2,2.46,2
1,1,2.0,7.420000e-01,2,15.1,2,5.7,2,0.18,2,1.07,2
2,1,3.0,8.030000e-01,2,14.2,2,4.4,2,0.21,2,0.88,2
3,2,1.0,1.452000e+00,2,28.8,2,14.7,2,0.23,2,2.48,2
4,2,2.0,7.260000e-01,2,14.7,2,5.6,2,0.20,2,1.44,2
...,...,...,...,...,...,...,...,...,...,...,...,...
1643,282,5.0,1.000000e+35,2,19.9,2,13.3,2,0.20,2,3.03,2
1644,282,6.0,1.000000e+35,2,17.2,2,9.4,2,0.33,2,2.12,2
1645,282,7.0,1.000000e+35,2,10.8,2,1.6,2,0.13,2,0.30,2
1646,282,8.0,1.000000e+35,2,10.9,2,0.5,2,0.02,2,0.04,2


## Merge Bottle and Nutrient Data but drop non nutrient vars?

In [32]:
# keep_param = ['bottle','prdm']
keep_param = ['bottle','prdm']

for cast,cdata in cruise_data.items():
    try:
        matchcast = int((cast.split('.')[0]).lower().split('ctd')[-1])
        cruise_data[cast] = pd.merge(nut_data[nut_data['Cast']==matchcast],cdata.reset_index()[keep_param],right_on='bottle',left_on='Niskin').set_index('bottle').drop(columns=['Cast'])
    except:
        continue

## Add Deployment meta information

In [33]:
#just a dictionary of dictionaries - simple
with open(cruise_meta_file) as file:
    cruise_config = yaml.full_load(file)

## Add Instrument meta information

Time, depth, lat, lon should be added regardless (always our coordinates) but for a mooring site its going to be a (1,1,1,t) dataset
The variables of interest should be read from the data file and matched to a key for naming.  That key is in the inst_config file seen below and should represent common conversion names in the raw data

In [34]:
with open(inst_meta_file) as file:
    inst_config = yaml.full_load(file)

## Add institutional meta-information


In [35]:
with open(group_meta_file) as file:
    group_config = yaml.full_load(file)

## Save CF Netcdf files

Currently stick to netcdf3 classic... but migrating to netcdf4 (default) may be no problems for most modern purposes.  Its easy enough to pass the `format` kwargs through to the netcdf api of xarray.

In [36]:
#loop over all casts and perform tasks shown above

for cast in cruise_data.keys():
    try:
        cruise_data[cast] = cruise_data[cast].rename(columns={
                            'Sil (uM)':'SI',
                            'PO4 (uM)':'PO4',
                            'NO2 (uM)':'NO2', 
                            'NO3 (uM)':'NO3',
                            'NH4 (uM)':'NH4',
                            'Sil Flag':'SI_WOCE_FLAG',
                            'PO4 Flag':'PO4_WOCE_FLAG',
                            'NO2 Flag':'NO2_WOCE_FLAG', 
                            'NO3 Flag':'NO3_WOCE_FLAG',
                            'NH4 Flag':'NH4_WOCE_FLAG',
                            'Niskin':'BTLID',
                            'prdm':'pressure',
                            'empty':'empty', #this will be ignored
                            'flag':'flag'})

        cruise_data_nc = ncCFsave.EcoFOCI_CFnc(df=cruise_data[cast], 
                                    instrument_yaml=inst_config, 
                                    operation_yaml=cruise_config,
                                    operation_type='ctd')

        cruise_data_nc.expand_dimensions(dim_names=['latitude','longitude','time'],geophys_sort=False)

        cruise_data_nc.variable_meta_data(variable_keys=list(cruise_data[cast].columns.values),drop_missing=False)
        #adding dimension meta needs to come after updating the dimension values... BUG?
        cruise_data_nc.dimension_meta_data(variable_keys=['time','latitude','longitude'])
        cruise_data_nc.temporal_geospatioal_meta_data_ctd(positiveE=False,conscastno=cast.split('.')[0])

        #add global attributes
        cruise_data_nc.deployment_meta_add(conscastno=cast.split('.')[0].upper())

        #add instituitonal global attributes
        cruise_data_nc.institution_meta_add(group_config)

        #add creation date/time - provenance data
        cruise_data_nc.provinance_meta_add()

        #provide intial qc status field
        cruise_data_nc.qc_status(qc_status='excellent') #<- options are unknown, excellent, probably good, mixed, unqcd

        cast = cast.lower().split('d')[-1].split('.')[0]
        cruise_data_nc.xarray2netcdf_save(xdf = cruise_data_nc.get_xdf(),
                                   filename=cruise_name+'c'+cast.zfill(3)+'_nut.nc',format="NETCDF3_CLASSIC")
    except:
        print(f'Skipping {cast}')
        os.remove(cruise_name+'c'+cast.zfill(3)+'_nut.nc')

Skipping 003
Skipping 006
Skipping 007
Skipping 025
Skipping 026
Skipping 027
Skipping 038
Skipping 039
Skipping 043
Skipping 044
Skipping 045
Skipping 047
Skipping 048
Skipping 055
Skipping 057
Skipping 065
Skipping 072
Skipping 074
Skipping 076
Skipping 082
Skipping 090
Skipping 092
Skipping 105
Skipping 107
Skipping 118
Skipping 123
Skipping 125
Skipping 127
Skipping 136
Skipping 145
Skipping 147
Skipping 148
Skipping 153
Skipping 155
Skipping 156
Skipping 169
Skipping 171
Skipping 172
Skipping 174
Skipping 175
Skipping 177
Skipping 178
Skipping 180
Skipping 191
Skipping 193
Skipping 195
Skipping 196
Skipping 204
Skipping 206
Skipping 207
Skipping 212
Skipping 230
Skipping 235
Skipping 238
Skipping 240
Skipping 259
Skipping 260
Skipping 261
Skipping 270
Skipping 276
Skipping 286


## Next Steps

QC of data (plot parameters with other instruments)
- be sure to updated the qc_status and the history