# Analyse Data Files:
Imports tools to analyze OOI netCDF files and provide summary outputs.

### Input variables:

In [63]:
sDir =  '/Users/leila/Documents/NSFEduSupport/review/output' # location to save summary output
url_list = ['https://opendap.oceanobservatories.org/thredds/catalog/ooi/leila.ocean@gmail.com/20190319T195519-CP05MOAS-GL335-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered/catalog.html',
            'https://opendap.oceanobservatories.org/thredds/catalog/ooi/leila.ocean@gmail.com/20190319T195533-CP05MOAS-GL335-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/catalog.html'] # THERDD server containing the netCDF data files
review_file = 'https://raw.githubusercontent.com/ooi-data-lab/data-review-prep/master/review_list/data_review_list.csv'
# f =  #location to a file containing THREDDs urls with .nc files to analyze. 
#The column containing the THREDDs urls must be labeled 'outputUrl'

### Import functions:

In [1]:
import os
import xarray as xr
import pandas as pd
import re
import numpy as np
import json
import datetime as dt
import netCDF4 as nc
import functions.common as cf
import functions.plotting as pf
from datetime import timedelta
from collections import OrderedDict

and netCDF < 4.4.1. Upgrading to netCDF4 >= 4.4.1 or downgrading to 
to HDF5 version 1.8.x is highly recommended 
(see https://github.com/Unidata/netcdf-c/issues/250).
  return f(*args, **kwds)


## Complete the analysis by reference designator

In [64]:
reviewlist = pd.read_csv(review_file)

In [89]:
for uu in url_list:
    # get instrument  = reference designator 
    elements = uu.split('/')[-2].split('-')
    rd = '-'.join((elements[1], elements[2], elements[3], elements[4]))
    
    data = OrderedDict(deployments=OrderedDict())
    
    # create an output file
    save_dir = os.path.join(sDir, r.split('-')[0], rd)
    cf.create_dir(save_dir)
    
    # check for the OOI 1.0 datasets for review    
    rl_filtered = reviewlist.loc[(reviewlist['Reference Designator'] == rd) & (reviewlist['status'] == 'for review')]
    
    # print to the screen
    catalog_rms = '-'.join((rd, elements[-2], elements[-1]))
    print(catalog_rms)
    print(pd.DataFrame({'deploymentNumber': rl_filtered['deploymentNumber'],
                        'startDateTime': rl_filtered['startDateTime'],
                       'stopDateTime': rl_filtered['stopDateTime'],
                       'in_am': rl_filtered['in_am']}))
    
        
    # get data files from THREDDS server
    udatasets = cf.get_nc_urls([uu])
    
    # get deployments from file names
    review_deployments = rl_filtered['deploymentNumber'].tolist()
    review_deployments_int = ['deployment%04d' % int(x) for x in review_deployments]

    # get data files of interest
    datasets = []
    for rev_dep in review_deployments_int:
        rdatasets = [s for s in udatasets if rev_dep in s]
        if len(rdatasets) > 0:            
            for dss in rdatasets:  # filter out collocated data files
                if catalog_rms == dss.split('/')[-1].split('_20')[0][15:]:
                    datasets.append(dss)
                    
print(udatasets)

CP05MOAS-GL335-03-CTDGVM000-recovered_host-ctdgv_m_glider_instrument_recovered
      deploymentNumber        startDateTime         stopDateTime in_am
3694               1.0  2014-10-06T20:16:00  2014-12-15T00:00:00   yes
3695               2.0  2015-10-13T01:12:14  2015-11-16T00:00:00   yes
3696               3.0  2016-04-04T18:57:02  2016-04-18T00:00:00   yes
3697               4.0  2016-05-27T20:33:00  2016-06-27T00:00:00   yes
3698               5.0  2017-01-16T14:59:00  2017-03-06T22:45:00   yes
Data request has fulfilled.
CP05MOAS-GL335-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument
      deploymentNumber        startDateTime         stopDateTime in_am
3694               1.0  2014-10-06T20:16:00  2014-12-15T00:00:00   yes
3695               2.0  2015-10-13T01:12:14  2015-11-16T00:00:00   yes
3696               3.0  2016-04-04T18:57:02  2016-04-18T00:00:00   yes
3697               4.0  2016-05-27T20:33:00  2016-06-27T00:00:00   yes
3698               5.0  2017-01-16T14:59:00  2

In [77]:
fname, subsite, refdes, method, data_stream, deployment = cf.nc_attributes(datasets[0])

In [90]:
print(datasets)

['https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190319T195533-CP05MOAS-GL335-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/deployment0001_CP05MOAS-GL335-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument_20141006T202152.905850-20141213T035757.300290.nc', 'https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190319T195533-CP05MOAS-GL335-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/deployment0002_CP05MOAS-GL335-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument_20151014T001900.237980-20151110T091835.231900.nc', 'https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190319T195533-CP05MOAS-GL335-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument/deployment0003_CP05MOAS-GL335-03-CTDGVM000-telemetered-ctdgv_m_glider_instrument_20160404T185705.311220-20160417T235956.145260.nc', 'https://opendap.oceanobservatories.org/thredds/dodsC/ooi/leila.ocean@gmail.com/20190319T195533-CP05MOAS-GL335-03-CTD

In [None]:
 # Get info from the data review database
dr_data = cf.refdes_datareview_json(refdes)
stream_vars = cf.return_stream_vars(data_stream)
sci_vars = cf.return_science_vars(data_stream)
deploy_info = get_deployment_information(dr_data, int(deployment[-4:]))

In [None]:
# Grab deployment Variables
deploy_start = str(deploy_info['start_date'])
deploy_stop = str(deploy_info['stop_date'])
deploy_lon = deploy_info['longitude']
deploy_lat = deploy_info['latitude']
deploy_depth = deploy_info['deployment_depth']

In [None]:
# Calculate days deployed
if deploy_stop != 'None':
    r_deploy_start = pd.to_datetime(deploy_start).replace(hour=0, minute=0, second=0)
    if deploy_stop.split('T')[1] == '00:00:00':
        r_deploy_stop = pd.to_datetime(deploy_stop)
    else:
        r_deploy_stop = (pd.to_datetime(deploy_stop) + timedelta(days=1)).replace(hour=0, minute=0, second=0)
    n_days_deployed = (r_deploy_stop - r_deploy_start).days
else:
    n_days_deployed = None