# Using EcoFOCIpy to process raw field data

## ____ Template for cruise (pre 2020)

## BTL Data + Nutrient Data

This is a streamlined version of generation routines to merge bottle data and Mordy Nut. Lab Nutrient Data for long term archive

<div class="warning" style='background-color:#E9D8FD; color: #69337A; border-left: solid #805AD5 4px; border-radius: 4px; padding:0.7em;'>
<span>
<p style='margin-top:1em; text-align:center'>
<b>A template for Nutrient Lab ASCII files to NETCDF</b></p>
<p style='margin-left:1em;'>
Populate the necessary paths in the following cells.</p>
<p style='margin-bottom:1em; margin-right:1em; text-align:right; font-family:Georgia'> <b>- Shaun Bell</b>
</p></span>
</div>


In [1]:
import yaml
import glob
import pandas as pd
import os
import xarray as xa

import ecofocipy.io.sbe_ctd_parser as sbe_ctd_parser #<- instrument specific
import ecofocipy.io.ncCFsave as ncCFsave
import ecofocipy.metaconfig.load_config as load_config

In [2]:
sample_data_dir = '/Users/bell/ecoraid/2013/CTDcasts/nw1301/' #root path to cruise directory
ecofocipy_dir = '/Users/bell/Programs/EcoFOCIpy/'

In [3]:
###############################################################
# edit to point to {cruise sepcific} raw datafiles 
datafile = sample_data_dir+'rawconverted/' #<- point to cruise and process all files within
datafile = '/Users/bell/ecoraid/2013/CTDcasts/nw1301/final_data/btl/' #<- point to cruise and process all files within
nutdatafile = sample_data_dir+'working/DiscreteNutrients/NW1301 Nutrient Data.txt' #<- point to cruise and process all files within
cruise_name = 'nw1301' #no hyphens
cruise_meta_file = sample_data_dir+'logs/NW1301.yaml'
inst_meta_file = sample_data_dir+'logs/FOCI_standard_CTDpNuts.yaml' #<- copy to each deployment for simplicity?
group_meta_file = ecofocipy_dir+'staticdata/institutional_meta_example.yaml'
###############################################################
#init and load data
filename_list = sorted(glob.glob(datafile + '*.nc'))

In [4]:
cruise_data = {}

for i in filename_list:
    cast = 'ctd'+i.split('.')[0].split('c')[-1].replace('_','.')
    cruise_data.update({cast:xa.load_dataset(datafile + i.split('/')[-1],decode_times=False)})

In [5]:
cast

'ctd053.btl'

In [6]:
cruise_data['ctd053.btl'].to_dataframe().reset_index()

Unnamed: 0,time,dep,lat,lon,time2,BTL_103,S_41,OST_62,O_65,ST_70,T_28,rFv_971,PAR_916,TRN_107,PAR_905,Chl_933,Tr_904,ATTN_55
0,2456495,2.163,56.400166,137.8405,80940000,10.0,32.153099,94.778961,241.830994,23.881599,14.5124,0.1132,3.7328,4.1014,61.074,0.6015,87.048798,0.5548
1,2456495,9.003,56.400166,137.8405,80940000,9.0,32.152,94.911118,242.722,23.9046,14.3986,0.1535,3.319,4.0851,23.294,1.0048,86.700401,0.5709
2,2456495,19.664,56.400166,137.8405,80940000,8.0,32.414799,95.181511,274.289001,25.1486,8.6455,0.3052,2.6273,4.1779,4.6486,2.5215,88.676804,0.4807
3,2456495,29.632,56.400166,137.8405,80940000,7.0,32.492298,90.550278,265.063995,25.316401,7.9195,0.1734,2.1393,4.3118,1.4317,1.2043,91.525497,0.3542
4,2456495,40.66,56.400166,137.8405,80940000,4.0,32.540901,86.967918,259.115997,25.465401,7.1244,0.101,1.7529,4.3922,0.52248,0.4795,93.237297,0.2801
5,2456495,49.634998,56.400166,137.8405,80940000,3.0,32.599499,85.998329,259.765015,25.5924,6.5088,0.089,1.4628,4.4144,0.21247,0.3601,93.708702,0.2599
6,2456495,100.200996,56.400166,137.8405,80940000,2.0,33.138302,63.03281,192.434006,26.0966,5.8789,0.0641,0.2663,4.4373,1e-12,0.1114,94.196503,0.2391
7,2456495,203.162994,56.400166,137.8405,80940000,1.0,33.869999,28.783421,88.429001,26.7356,5.3779,0.0605,0.0944,4.4391,1e-12,0.0748,94.235298,0.2375


## Load csv Nutrient File

In [7]:
nut_data = pd.read_csv(nutdatafile,delimiter='\t')
nut_data

Unnamed: 0,Bottle ID,Cast,Niskin,PO4 (uM),Sil (uM),NO3 (uM),NO2 (uM),NH4 (uM),Temp
0,1,1,1,2.047,45.4,25.9,0.29,1.30,23.8
1,2,1,2,1.940,40.8,24.3,0.27,1.16,23.8
2,3,1,3,1.540,25.4,16.8,0.25,2.14,23.8
3,4,1,4,1.461,22.5,15.1,0.24,2.20,23.8
4,5,1,5,1.269,17.6,12.2,0.21,2.01,23.8
...,...,...,...,...,...,...,...,...,...
418,420,53,4,1.261,17.0,13.6,0.35,0.58,23.5
419,421,53,5,1.147,14.2,10.4,0.31,1.63,23.5
420,422,53,6,0.943,11.3,7.7,0.15,0.51,23.5
421,423,53,7,0.410,3.3,0.0,0.00,0.04,23.5


In [8]:
#nw1201 relable niskins
nut_data.loc[nut_data['Niskin']> 4,'Niskin'] = nut_data.loc[nut_data['Niskin']> 4,'Niskin']+2
nut_data

Unnamed: 0,Bottle ID,Cast,Niskin,PO4 (uM),Sil (uM),NO3 (uM),NO2 (uM),NH4 (uM),Temp
0,1,1,1,2.047,45.4,25.9,0.29,1.30,23.8
1,2,1,2,1.940,40.8,24.3,0.27,1.16,23.8
2,3,1,3,1.540,25.4,16.8,0.25,2.14,23.8
3,4,1,4,1.461,22.5,15.1,0.24,2.20,23.8
4,5,1,7,1.269,17.6,12.2,0.21,2.01,23.8
...,...,...,...,...,...,...,...,...,...
418,420,53,4,1.261,17.0,13.6,0.35,0.58,23.5
419,421,53,7,1.147,14.2,10.4,0.31,1.63,23.5
420,422,53,8,0.943,11.3,7.7,0.15,0.51,23.5
421,423,53,9,0.410,3.3,0.0,0.00,0.04,23.5


## Merge Bottle and Nutrient Data but drop non nutrient vars?

<div class="warning" style='background-color:#ffcccb; color: #FF0000; border-left: solid #805AD5 4px; border-radius: 4px; padding:0.7em;'>
<span>
<p style='margin-top:1em; text-align:center'>
<b>WARNING</b></p>
<p style='margin-left:1em;'>bottle/niskin and rosette position should be the same but can be different (example, bottles are labeled sequentially but a rosette position is skipped due to balancing or other instruments.</p>
</div>

In [9]:
keep_param = ['BTL_103','dep'] #sometimes prsm

for cast,cdata in cruise_data.items():
    try:
        matchcast = int((cast.split('.')[0]).lower().split('ctd')[-1])
        cruise_data[cast] = pd.merge(nut_data[nut_data['Cast']==matchcast],cdata.to_dataframe().reset_index()[keep_param],right_on='BTL_103',left_on='Niskin').set_index('BTL_103').drop(columns=['Cast'])
    except:
        print('something is wrong')

## Add Deployment meta information

In [10]:
#just a dictionary of dictionaries - simple
with open(cruise_meta_file) as file:
    cruise_config = yaml.full_load(file)

## Add Instrument meta information

Time, depth, lat, lon should be added regardless (always our coordinates) but for a mooring site its going to be a (1,1,1,t) dataset
The variables of interest should be read from the data file and matched to a key for naming.  That key is in the inst_config file seen below and should represent common conversion names in the raw data

In [11]:
with open(inst_meta_file) as file:
    inst_config = yaml.full_load(file)

## Add institutional meta-information


In [12]:
with open(group_meta_file) as file:
    group_config = yaml.full_load(file)

## Save CF Netcdf files

Currently stick to netcdf3 classic... but migrating to netcdf4 (default) may be no problems for most modern purposes.  Its easy enough to pass the `format` kwargs through to the netcdf api of xarray.

In [13]:
#loop over all casts and perform tasks shown above

for cast in cruise_data.keys():
    try:
        cruise_data[cast] = cruise_data[cast].rename(columns={
                            'Sil (uM)':'SI',
                            'PO4 (uM)':'PO4',
                            'NO2 (uM)':'NO2', 
                            'NO3 (uM)':'NO3',
                            'NH4 (uM)':'NH4',
                            'BTL_103':'BTLID',
                            # 'prdm':'pressure',
                            'dep':'depth',
                            'empty':'empty', #this will be ignored
                            'flag':'flag'})

        cruise_data[cast].index = cruise_data[cast].index.rename('bottle')
        
        cruise_data_nc = ncCFsave.EcoFOCI_CFnc(df=cruise_data[cast], 
                                    instrument_yaml=inst_config, 
                                    operation_yaml=cruise_config,
                                    operation_type='ctd')

        cruise_data_nc.expand_dimensions(dim_names=['latitude','longitude','time'],geophys_sort=False)

        cruise_data_nc.variable_meta_data(variable_keys=list(cruise_data[cast].columns.values),drop_missing=False)
        #adding dimension meta needs to come after updating the dimension values... BUG?
        cruise_data_nc.dimension_meta_data(variable_keys=['time','latitude','longitude'])
        cruise_data_nc.temporal_geospatioal_meta_data_ctd(positiveE=False,conscastno=cast.split('.')[0])

        #add global attributes
        cruise_data_nc.deployment_meta_add(conscastno=cast.split('.')[0].upper())

        #add instituitonal global attributes
        cruise_data_nc.institution_meta_add(group_config)

        #add creation date/time - provenance data
        cruise_data_nc.provinance_meta_add()

        #provide intial qc status field
        cruise_data_nc.qc_status(qc_status='excellent') #<- options are unknown, excellent, probably good, mixed, unqcd

        cruise_data_nc.xarray2netcdf_save(xdf = cruise_data_nc.get_xdf(),
                                   filename=cruise_name+'c'+cast.lower().split('d')[-1].split('.')[0].zfill(3)+'_nut.nc',format="NETCDF3_CLASSIC")
    except KeyError:
        print(f'Skipping {cast}')
    except RuntimeError:
        print(f'Skipping & Removing {cast}')
        os.remove(path=cruise_name+'c'+cast.lower().split('d')[-1].split('.')[0].zfill(3)+'_nut.nc')

## Next Steps

QC of data (plot parameters with other instruments)
- be sure to updated the qc_status and the history

In [14]:
cruise_data_nc.get_xdf()