In [10]:
# functions needed to run the notebook
import pandas as pd
import functions.common as cf
import numpy as np

**OOI File System Tree for Gliders:**

 - <p style="color:green; font-size:1.1em;">Instrument</p>
 
  - <p style="color:red; font-size:1.1em;">deployment(s)</p>
  
    - <p style="color:blue; font-size:1.1em;">method(s) </p>
    
      - <p style="color:orange; font-size:1.1em;">recovered_host</p>
      
        - science data stream(s)
        - engineering data stream(s)
        
      - <p style="color:orange; font-size:1.1em;">telemetered</p>
      
        - science data stream(s)
        - engineering data stream(s)

**Notebook Purpose:** 
- How to select the data stream name for the preferred method containing science data for review ?
  
NOTE: Gliders data review process was designed to select the data stream name with data from the **recovered host** method because this will be the most complete dataset. If recovered host is not available telemetered is reviewed. 

**1**
- define variables  
  - review file created upfront to do this analysis
      - source: https://github.com/ooi-data-lab/data-review-prep/tree/master/review_list
  - THERDD server containing the netCDF data files
      - source:
https://opendap.oceanobservatories.org/thredds/catalog/ooi/lgarzio@marine.rutgers.edu/catalog.html  

In [1]:
# THERDD server contains the netCDF data files
url_list = ['https://opendap.oceanobservatories.org/thredds/catalog/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument/catalog.html',
            'https://opendap.oceanobservatories.org/thredds/catalog/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/catalog.html']

# url_list = ['https://opendap.oceanobservatories.org/thredds/catalog/ooi/leila.ocean@gmail.com/20190306T174413-CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/catalog.html',
#             'https://opendap.oceanobservatories.org/thredds/catalog/ooi/leila.ocean@gmail.com/20190306T174435-CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/catalog.html']

# review file was created upfront to do this analysis
review_file = 'https://raw.githubusercontent.com/ooi-data-lab/data-review-prep/master/review_list/data_review_list.csv'

**2**

- Get the deployment numbers
- source: https://github.com/ooi-integration/asset-management/tree/master/deployment
- method: filter on the reference designator

In [12]:
# get reference designator for url list
elements = url_list[0].split('/')[-2].split('-')
rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))

# check for the OOI 1.0 datasets for review 
reviewlist = pd.read_csv(review_file)
rl_filtered = reviewlist.loc[(reviewlist['Reference Designator'] == rd) & (reviewlist['status'] == 'for review')]

# get deployments from file names
review_deployments = rl_filtered['deploymentNumber'].tolist()

# print to the screen
df = pd.DataFrame({'deploymentNumber': review_deployments,
                   'startDateTime': rl_filtered['startDateTime'],
                   'stopDateTime': rl_filtered['stopDateTime'],
                   'in_asset_management': rl_filtered['in_am']})
df

Unnamed: 0,deploymentNumber,startDateTime,stopDateTime,in_asset_management
3704,1.0,2014-10-06T20:16:00,2014-12-15T00:00:00,yes
3705,2.0,2015-10-13T01:12:14,2015-11-16T00:00:00,yes
3706,3.0,2016-04-04T18:57:02,2016-04-18T00:00:00,yes
3707,4.0,2016-05-27T20:33:00,2016-06-27T00:00:00,yes
3708,5.0,2017-01-16T14:59:00,2017-03-06T22:45:00,yes


**3** 

- Get the list of data files
- method:
filter on **deployments** and the **instrument** of interest

In [13]:
df = pd.DataFrame()
review_deployments_int = ['deployment%04d' % int(x) for x in review_deployments]

for uu in url_list:
    udatasets = cf.get_nc_urls([uu])
    # filter out on deployment for review 
    for rev_dep in review_deployments_int:
        rdatasets = [s for s in udatasets if rev_dep in s]
        if len(rdatasets) > 0: 
            # filter out on the sensor of interset
            for dss in rdatasets:  # filter out collocated data files
                elements = uu.split('/')[-2].split('-')
                catalog_rms = '-'.join((rd, elements[-2], elements[-1]))
                method = uu.split('-')[5]
                if catalog_rms == dss.split('/')[-1].split('_20')[0][15:]:                     
                    df0 = pd.DataFrame({'datasets': dss, 'method': method}, index=[rev_dep])
                    df = df.append(df0)
pd.set_option('display.max_colwidth', -1)
(df)

Data request has fulfilled.
Data request has fulfilled.


Unnamed: 0,datasets,method
deployment0001,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument/deployment0001_CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument_20141006T202152.905850-20141213T035750.235320.nc,telemetered
deployment0002,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument/deployment0002_CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument_20151014T001900.237980-20151110T091836.233310.nc,telemetered
deployment0003,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument/deployment0003_CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument_20160404T185705.311220-20160417T235956.145260.nc,telemetered
deployment0004,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument/deployment0004_CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument_20160527T212312.351560-20160626T091401.747920.nc,telemetered
deployment0005,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument/deployment0005_CP05MOAS-GL335-05-PARADM000-telemetered-parad_m_glider_instrument_20170116T150223.595370-20170304T045334.799840.nc,telemetered
deployment0001,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/deployment0001_CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered_20141006T202152.905850-20141213T073238.247380.nc,recovered_host
deployment0002,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/deployment0002_CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered_20151014T001900.237980-20151110T091855.472810.nc,recovered_host
deployment0003,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/deployment0003_CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered_20160404T185705.311220-20160417T235956.145260.nc,recovered_host
deployment0004,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/deployment0004_CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered_20160529T204727.075500-20160626T091401.747920.nc,recovered_host
deployment0005,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/deployment0005_CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered_20170116T150223.595370-20170304T093047.153350.nc,recovered_host


..Save to a file to use with data comaparison note book..

In [18]:
df.to_csv('data_files_list_'+ rd +'.csv', index=True)

**4**
- method: Filter data files list on preferred method

In [15]:
method_list = ['streamed', 'recovered_inst', 'recovered_wfp', 'recovered_cspp', 'recovered_host', 'telemetered']
deployments = np.unique(np.sort(list(df.index.values)))
df_info = pd.DataFrame()
for d in deployments:
    df_d = df[df.index.values == d]
    if len(df_d['method']) != 1: # sort methods in order of preference
        z = sorted(df_d['method'], key=lambda zz: method_list.index(zz)) # sorted method list        
        df_d = df_d[df_d['method'] == z[0]]

    df_info = df_info.append(df_d)
pd.set_option('display.max_colwidth', -1)
(df_info)

Unnamed: 0,datasets,method
deployment0001,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/deployment0001_CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered_20141006T202152.905850-20141213T073238.247380.nc,recovered_host
deployment0002,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/deployment0002_CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered_20151014T001900.237980-20151110T091855.472810.nc,recovered_host
deployment0003,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/deployment0003_CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered_20160404T185705.311220-20160417T235956.145260.nc,recovered_host
deployment0004,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/deployment0004_CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered_20160529T204727.075500-20160626T091401.747920.nc,recovered_host
deployment0005,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/lgarzio@marine.rutgers.edu/20190509T131304-CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered/deployment0005_CP05MOAS-GL335-05-PARADM000-recovered_host-parad_m_glider_recovered_20170116T150223.595370-20170304T093047.153350.nc,recovered_host


..Save to a file to use with other notebooks..

In [19]:
df_info.to_csv('data_review_list_' + rd + '.csv', index=True)

**END**