In [1]:
# THERDD server contains the netCDF data files
url_list = ['https://opendap.oceanobservatories.org/thredds/catalog/ooi/leila.ocean@gmail.com/20190306T174413-CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/catalog.html',
            'https://opendap.oceanobservatories.org/thredds/catalog/ooi/leila.ocean@gmail.com/20190306T174435-CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/catalog.html']

# review file was created upfront to do this analysis
review_file = 'https://raw.githubusercontent.com/ooi-data-lab/data-review-prep/master/review_list/data_review_list.csv'

In [2]:
import pandas as pd

In [3]:
# get instrument  = reference designator 
elements = url_list[0].split('/')[-2].split('-')
rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))

# check for the OOI 1.0 datasets for review 
reviewlist = pd.read_csv(review_file)
rl_filtered = reviewlist.loc[(reviewlist['Reference Designator'] == rd) & (reviewlist['status'] == 'for review')]

# get deployments from file names
review_deployments = rl_filtered['deploymentNumber'].tolist()

# print to the screen
print(pd.DataFrame({'deploymentNumber': review_deployments,
                   'startDateTime': rl_filtered['startDateTime'],
                   'stopDateTime': rl_filtered['stopDateTime'],
                   'in_am': rl_filtered['in_am']}))

      deploymentNumber        startDateTime         stopDateTime in_am
3918               1.0  2014-04-15T13:15:00  2014-07-11T00:00:00   yes
3919               2.0  2015-05-09T11:02:00  2015-07-02T00:00:00   yes
3920               3.0  2016-01-21T15:00:00  2016-05-18T03:56:00   yes
3921               4.0  2017-01-16T12:46:00  2017-01-16T23:59:59   yes


In [5]:
import functions.common as cf

In [6]:
df = pd.DataFrame()
review_deployments_int = ['deployment%04d' % int(x) for x in review_deployments]

for uu in url_list:
    udatasets = cf.get_nc_urls([uu])
    # filter out on deployment for review 
    for rev_dep in review_deployments_int:
        rdatasets = [s for s in udatasets if rev_dep in s]
        if len(rdatasets) > 0: 
            # filter out on the sensor of interset
            for dss in rdatasets:  # filter out collocated data files
                elements = uu.split('/')[-2].split('-')
                catalog_rms = '-'.join((rd, elements[-2], elements[-1]))
                method = uu.split('-')[5]
                if catalog_rms == dss.split('/')[-1].split('_20')[0][15:]:                     
                    df0 = pd.DataFrame({'datasets': dss, 'method': method}, index=[rev_dep])
                    df = df.append(df0)

Data request has fulfilled.
Data request has fulfilled.


In [7]:
pd.set_option('display.max_colwidth', -1)
(df)

Unnamed: 0,datasets,method
deployment0001,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174413-CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0001_CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20140415T132159.798580-20140706T212524.736690.nc,recovered_host
deployment0002,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174413-CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0002_CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20150509T113514.756870-20150625T150702.264890.nc,recovered_host
deployment0003,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174413-CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0003_CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20160121T150043.663730-20160518T015328.144100.nc,recovered_host
deployment0001,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174435-CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/deployment0001_CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument_20140415T134258.674740-20140706T212530.867710.nc,telemetered
deployment0002,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174435-CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/deployment0002_CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument_20150509T125849.226410-20150625T123743.327300.nc,telemetered
deployment0003,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174435-CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/deployment0003_CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument_20160121T150043.663730-20160518T015328.144100.nc,telemetered
deployment0004,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174435-CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/deployment0004_CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument_20170116T125355.760380-20170116T221950.330870.nc,telemetered


In [8]:
df.to_csv('data_files_list.csv', index=True)

In [9]:
import numpy as np

In [10]:
method_list = ['streamed', 'recovered_inst', 'recovered_wfp', 'recovered_cspp', 'recovered_host', 'telemetered']
deployments = np.unique(np.sort(list(df.index.values)))
df_info = pd.DataFrame()
for d in deployments:
    df_d = df[df.index.values == d]
    if len(df_d['method']) != 1: # sort methods in order of preference
        z = sorted(df_d['method'], key=lambda zz: method_list.index(zz)) # sorted method list        
        df_d = df_d[df_d['method'] == z[0]]

    df_info = df_info.append(df_d)

In [11]:
pd.set_option('display.max_colwidth', -1)
(df_info)

Unnamed: 0,datasets,method
deployment0001,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174413-CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0001_CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20140415T132159.798580-20140706T212524.736690.nc,recovered_host
deployment0002,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174413-CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0002_CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20150509T113514.756870-20150625T150702.264890.nc,recovered_host
deployment0003,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174413-CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/deployment0003_CP05MOAS-GL379-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered_20160121T150043.663730-20160518T015328.144100.nc,recovered_host
deployment0004,https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190306T174435-CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/deployment0004_CP05MOAS-GL379-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument_20170116T125355.760380-20170116T221950.330870.nc,telemetered


In [12]:
df_info.to_csv('define_datareview_list.csv', index=True)