In [161]:
import matplotlib.pyplot as plt
import requests
from requests.packages.urllib3.util.retry import Retry
import json
import datetime
import pandas as pd
from math import isnan
import concurrent.futures
import logging
import gc
from pprint import pprint

In [162]:
username = 'OOIAPI-30AZZ33CYL06XZ'
token = 'M8OD8XIG5KD'
array = 'cabled'

QC_PARAMETER_URL = 'https://ooinet.oceanobservatories.org/api/m2m/12578/qcparameters/'
DEPLOYEMENT_URL = 'https://ooinet.oceanobservatories.org/api/m2m/12587/events/deployment/inv/'
DATA_URL = 'https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/'

In [163]:
print("requesting qc data...")
r = requests.get(QC_PARAMETER_URL, auth=(username, token))
data = r.json()

refdes_qc_list = []
parameter_qc_list = []
globalrange_min_qc_list = []

for i in range(len(data)):
    if data[i]['qcParameterPK']['qcId'] == 'dataqc_globalrangetest_minmax' \
    and data[i]['qcParameterPK']['parameter'] == 'dat_min':
        
        refdes = data[i]['qcParameterPK']['refDes']['subsite']+'-'+\
            data[i]['qcParameterPK']['refDes']['node']+'-'+\
            data[i]['qcParameterPK']['refDes']['sensor']
        refdes_qc_list.append(refdes)
        
        parameter = data[i]['qcParameterPK']['streamParameter']
        parameter_qc_list.append(parameter)
        
        globalrange_min = data[i]['value']
        globalrange_min_qc_list.append(globalrange_min)

qc_dict = {
    'refdes':refdes_qc_list,
    'parameter':parameter_qc_list,
    'global_range_min':globalrange_min_qc_list,
}     
        
globalrange_min_qc_data = pd.DataFrame(qc_dict,columns=['refdes','parameter','global_range_min'])

refdes_qc_list = []
parameter_qc_list = []
globalrange_max_qc_list = []

for i in range(len(data)):
    if data[i]['qcParameterPK']['qcId'] == 'dataqc_globalrangetest_minmax' \
    and data[i]['qcParameterPK']['parameter'] == 'dat_max':
        
        refdes = data[i]['qcParameterPK']['refDes']['subsite']+'-'+\
            data[i]['qcParameterPK']['refDes']['node']+'-'+\
            data[i]['qcParameterPK']['refDes']['sensor']
        refdes_qc_list.append(refdes)
        
        parameter = data[i]['qcParameterPK']['streamParameter']
        parameter_qc_list.append(parameter)
        
        globalrange_max = data[i]['value']
        globalrange_max_qc_list.append(globalrange_max)

qc_dict = {
    'refdes':refdes_qc_list,
    'parameter':parameter_qc_list,
    'global_range_max':globalrange_max_qc_list,
}     
        
globalrange_max_qc_data = pd.DataFrame(qc_dict,columns=['refdes','parameter','global_range_max'])

global_ranges = pd.merge(globalrange_min_qc_data,globalrange_max_qc_data, on=['refdes','parameter'], how='outer')

requesting qc data...


In [165]:
# set up some functions
def request_data(url,username,token):
    auth = (username, token)
    return session.get(url,auth=auth)

def to_integer(dt_time):
    return 10000*dt_time.year + 100*dt_time.month + dt_time.day

def diff_days(d1,d2):
    return (d2 - d1).days

ntp_epoch = datetime.datetime(1900, 1, 1)
unix_epoch = datetime.datetime(1970, 1, 1)
ntp_delta = (unix_epoch - ntp_epoch).total_seconds()

pool = concurrent.futures.ThreadPoolExecutor(max_workers=20)
session = requests.session()
retry = Retry(
        total=10,
        backoff_factor=0.3,
    )
adapter = requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100,max_retries=retry,pool_block=True)
session.mount('http://', adapter)

logging.basicConfig(filename=array+'_requests.log',level=logging.DEBUG)

refdes = 'input/' + array + '.csv'
refdes_list = pd.read_csv(refdes)
refdes_list = refdes_list['refdes']
refdes_list = refdes_list.drop_duplicates()

print('\n'+"working on", array)
print("building deployment info requests...")
asset_requests = []
for i in refdes_list:
    sub_site = i[:8]
    platform = i[9:14]
    instrument = i[15:27]
    asset_url_inputs = '/'.join((sub_site, platform, instrument))
    request_url = DEPLOYEMENT_URL+asset_url_inputs+'/-1'
    asset_requests.append(request_url)

print("sending deployment info requests...")
ref_des_list = []
start_time_list = []
end_time_list = []
deployment_list = []

future_to_url = {pool.submit(request_data, url, username, token): url for url in asset_requests}
for future in concurrent.futures.as_completed(future_to_url):
    try:
        asset_info = future.result()
        asset_info = asset_info.json()

        for i in range(len(asset_info)):
            refdes = asset_info[i]['referenceDesignator']
            ref_des_list.append(refdes)

            deployment = asset_info[i]['deploymentNumber']
            deployment_list.append(deployment)

            start = asset_info[i]['eventStartTime']
            end = asset_info[i]['eventStopTime']

            try:
                start_time = datetime.datetime.utcfromtimestamp(start/1000.0)
                start_time_list.append(start_time)

                end_time = datetime.datetime.utcfromtimestamp(end/1000.0)
                end_time_list.append(end_time)

            except:
                end_time = datetime.datetime.utcnow()
                end_time_list.append(end_time)

    except:
        pass

data_dict = {
    'refdes':ref_des_list,
    'deployment':deployment_list,
    'start_time':start_time_list,
    'end_time':end_time_list}
deployment_data = pd.DataFrame(data_dict, columns = ['refdes', 'deployment','start_time', 'end_time'])


working on cabled
building deployment info requests...
sending deployment info requests...


In [166]:
print("calculating days between deployment dates...")
deployment_data_days = pd.DataFrame(columns = ['refdes', 'deployment','date'])

# calculate days between deployment dates
for index, row in deployment_data.iterrows():
    start_time = row['start_time']
    end_time = row['end_time']
    periods = diff_days(start_time, end_time)
    start_time = to_integer(start_time)
    total_days = pd.DataFrame({'date' : pd.date_range(str(start_time),periods=periods,freq='D')})

    total_days['refdes'] = row['refdes']
    total_days['deployment'] = row['deployment']
    deployment_data_days = deployment_data_days.append(total_days)

# re-order data frame columns
deployment_data_days = deployment_data_days[['refdes', 'deployment','date']]

print("building data request urls...")
deployment_data_days['start_date'] = deployment_data_days['date'] + datetime.timedelta(seconds=5)
deployment_data_days['end_date'] = deployment_data_days['date'] + datetime.timedelta(seconds=86395)

qc_db_input = 'input/' + array + '.csv'
qc_db_input = pd.read_csv(qc_db_input)
refdes_streams_df = qc_db_input[['refdes','method','stream','parameter']]
refdes_streams_df = refdes_streams_df.drop_duplicates()

request_inputs = pd.merge(refdes_streams_df,deployment_data_days, on='refdes')

request_inputs['subsite'] = request_inputs.refdes.str[:8]
request_inputs['platform'] = request_inputs.refdes.str[9:14]
request_inputs['instrument'] = request_inputs.refdes.str[15:27]
request_inputs['start_date'] = pd.to_datetime(request_inputs['start_date'])
request_inputs['start_date'] = request_inputs.start_date.dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')
request_inputs['end_date'] = pd.to_datetime(request_inputs['end_date'])
request_inputs['end_date'] = request_inputs.end_date.dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')

request_inputs['urls'] = DATA_URL+\
                        request_inputs.subsite+\
                        '/'+request_inputs.platform+\
                        '/'+request_inputs.instrument+\
                        '/'+request_inputs.method+\
                        '/'+request_inputs.stream+\
                        '?beginDT='+request_inputs.start_date+\
                        '&endDT='+request_inputs.end_date+\
                        '&limit=50'

request_urls = request_inputs['urls'].drop_duplicates()
request_urls = request_urls.tolist()

calculating days between deployment dates...
building data request urls...


In [167]:
print("sending data requests for", array+'...')
print('\t',"current time:", datetime.datetime.now())
print('\t',len(request_urls ),"data requests being sent")
print('\t',"check",array+"_requests.log","file in your working directory for progress")

finaldf = pd.DataFrame()
missing = []

future_to_url = {pool.submit(request_data, url, username, token): url for url in request_urls}
for future in concurrent.futures.as_completed(future_to_url):
#     url = future_to_url[future]
    try:
        data = future.result() 
        data = data.json()

        refdes_list = []
        parameter_list = []
        timestamp_list = []
        value_list = []
        
        # use this to speed up the loop
#         df = pd.DataFrame.from_records(map(json.loads, map(json.dumps,data)))
        
        # iterate through data points to extract time stamps
        for i in range(len(data)):
            timestamp = data[i]['time']
            timestamp = datetime.datetime.utcfromtimestamp(timestamp - ntp_delta).replace(microsecond=0)
            timestamp = timestamp.date()
      
            # get refdes from the response and create data frame y with the corresponding gloabl range values
            refdes = data[i]['pk']['subsite'] + '-' + data[i]['pk']['node'] + '-' + data[i]['pk']['sensor']
            x = global_ranges['refdes'] == refdes
            y = global_ranges[x]

            # check if global range list contains an entry for the refdes
            templist = list(global_ranges['refdes'])
            if refdes not in templist:
                missing.append(refdes)
                
                
            # iterate through all variables in global range data frame y, then iterate through keys in data point
            # to find matching keys, then grab values
            for var in y.parameter.values:
                for j in data[i].keys():
                    if var == j:
                        z = data[i][j]
                        
                        # conditional to handle 2d datasets, in which case the first non nan value is checked
                        if type(z) != list:
                            refdes_list.append(refdes)
                            parameter_list.append(var)
                            value_list.append(z)
                            timestamp_list.append(timestamp)
                        else:
                            u = next(u for u in z if not isnan(u))
                            refdes_list.append(refdes)
                            parameter_list.append(var)
                            value_list.append(u)
                            timestamp_list.append(timestamp)
                            

        # create data frame from lists collected above
        data_dict = {
            'refdes':refdes_list,
            'parameter':parameter_list,
            'value':value_list,
            'date':timestamp_list}
        response_data = pd.DataFrame(data_dict, columns = ['refdes','parameter','value','date'])

        # merge into data frame with global range values and check if value between global ranges
        df = y.merge(response_data,how='outer')
        df['pass'] = (df['value'] < pd.to_numeric(df['global_range_max'])) & \
                        (df['value'] > pd.to_numeric(df['global_range_min']))

        # collapse the data frame to calculate percent of data points that pass the test for that day
        df2 = df['pass'].groupby([df['refdes'], \
                    df['parameter'],\
                  df['date'] \
                   ]).sum().reset_index()
        df2['percent'] = (df2['pass'] / len(data)) * 100
        df2['data_points'] = len(data)
        df2 = df2[['refdes','parameter','date','data_points','percent']]

        # append result for this ref des and day to final data frame
        finaldf = finaldf.append(df2)
            
    except:
#         print('no data for ', url)
        pass

#     gc.collect()

sending data requests for cabled...
	 current time: 2017-11-29 00:14:30.287136
	 66579 data requests being sent
	 check cabled_requests.log file in your working directory for progress


In [170]:
finaldf.to_csv('output/'+array+'_quality.csv', index=False)

# get list of instruments and parameters for which no data was returned, or no qc values are available
returns = finaldf[['refdes','parameter']].drop_duplicates()
expected = qc_db_input[['refdes','parameter']].drop_duplicates()
not_found = returns.merge(expected,indicator=True, how='outer')
not_found = not_found[not_found['_merge'] == 'right_only']
del not_found['_merge']
print(not_found,'\n')

# get list of just instruments missing qc values all together
print('the following reference designators are missing global range values\n',set(missing))

                          refdes  \
145  RS01SBPD-DP01A-03-FLCDRA102   
146  RS01SBPD-DP01A-04-FLNTUA102   
147  RS01SBPD-DP01A-04-FLNTUA102   
148  RS01SBPD-DP01A-04-FLNTUA102   
149  RS01SBPD-DP01A-05-OPTAAC102   
150  RS01SBPD-DP01A-05-OPTAAC102   
151  RS01SBPS-PC01A-06-VADCPA101   
152  RS01SBPS-PC01A-07-CAMDSC102   
153  RS01SBPS-PC01A-4A-DOSTAD103   
154  RS01SBPS-PC01A-4A-DOSTAD103   
155  RS01SBPS-PC01A-4A-DOSTAD103   
156  RS01SBPS-PC01A-4A-DOSTAD103   
157  RS01SBPS-PC01A-4A-DOSTAD103   
158  RS01SBPS-PC01A-4A-DOSTAD103   
159  RS01SBPS-PC01A-4A-DOSTAD103   
160  RS01SBPS-PC01A-4B-PHSENA102   
161  RS01SBPS-PC01A-4C-FLORDD103   
162  RS01SBPS-SF01A-2A-DOFSTA102   
163  RS01SBPS-SF01A-2A-DOFSTA102   
164  RS01SBPS-SF01A-2A-DOFSTA102   
165  RS01SBPS-SF01A-2A-DOFSTA102   
166  RS01SBPS-SF01A-2A-DOFSTA102   
167  RS01SBPS-SF01A-2A-DOFSTA102   
168  RS01SBPS-SF01A-2D-PHSENA101   
169  RS01SBPS-SF01A-3A-FLORTD101   
170  RS01SBPS-SF01A-3D-SPKIRA101   
171  RS01SBPS-SF01A-4A-NUTNR

In [171]:
request_inputs = request_inputs[['refdes','method','stream','parameter','date']]
request_inputs['date'] = pd.to_datetime(request_inputs['date'])
finaldf['date'] = pd.to_datetime(finaldf['date'])
# finaldf['date'] = finaldf['time_stamp']
finaldf = finaldf[['refdes','parameter','date','data_points','percent']]
new = request_inputs.merge(finaldf, on=(['refdes','parameter','date']),how='outer')
new.to_csv('output/'+array+'_examine.csv', index=False)

In [142]:
request_inputs

Unnamed: 0,refdes,method,stream,parameter,date
0,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-09-28
1,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-09-29
2,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-09-30
3,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-01
4,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-02
5,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-03
6,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-04
7,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-05
8,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-06
9,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-07


In [143]:
finaldf

Unnamed: 0,refdes,parameter,date,data_points,percent
0,RS03AXBS-LJ03A-05-HPIESA301,hpies_bliley_frequency,2014-08-24,144,0.0
1,RS03AXBS-LJ03A-05-HPIESA301,hpies_bliley_temperature_L1,2014-08-24,144,100.0
2,RS03AXBS-LJ03A-05-HPIESA301,hpies_temperature,2014-08-24,144,100.0
3,RS03AXBS-LJ03A-05-HPIESA301,hpies_travel_time1,2014-08-24,144,0.0
4,RS03AXBS-LJ03A-05-HPIESA301,hpies_travel_time1_L1,2014-08-24,144,100.0
5,RS03AXBS-LJ03A-05-HPIESA301,hpies_travel_time2,2014-08-24,144,0.0
6,RS03AXBS-LJ03A-05-HPIESA301,hpies_travel_time2_L1,2014-08-24,144,100.0
7,RS03AXBS-LJ03A-05-HPIESA301,hpies_travel_time3,2014-08-24,144,0.0
8,RS03AXBS-LJ03A-05-HPIESA301,hpies_travel_time3_L1,2014-08-24,144,100.0
9,RS03AXBS-LJ03A-05-HPIESA301,hpies_travel_time4,2014-08-24,144,0.0


In [144]:
len(request_inputs)

41981

In [149]:
new = request_inputs.merge(finaldf, on=(['refdes','parameter','date']),how='outer')

In [160]:
new.to_csv('output/'+array+'_examine.csv', index=False)

In [141]:
finaldf = finaldf.reset_index(drop=True)

Unnamed: 0,refdes,method,stream,parameter,date,data_points,percent
0,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-09-28,,
1,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-09-29,,
2,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-09-30,,
3,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-01,,
4,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-02,,
5,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-03,,
6,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-04,,
7,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-05,,
8,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-06,,
9,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,2014-10-07,,


In [151]:
test = request_inputs['refdes'] == 'RS03AXBS-LJ03A-05-HPIESA301'
a = request_inputs[test]

In [159]:
for i,j in a.iterrows():
    print(j['stream'],j['parameter'])

echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sounding hpies_pressure_L1
echo_sou

In [45]:
df3

Unnamed: 0,refdes,parameter,time_stamp,data_points,percent
0,RS03AXBS-LJ03A-12-CTDPFB301,ctd_tc_oxygen,2017-11-27,51,100.0
1,RS03AXBS-LJ03A-12-CTDPFB301,density,2017-11-27,51,100.0
2,RS03AXBS-LJ03A-12-CTDPFB301,dissolved_oxygen,2017-11-27,51,100.0
3,RS03AXBS-LJ03A-12-CTDPFB301,practical_salinity,2017-11-27,51,100.0
4,RS03AXBS-LJ03A-12-CTDPFB301,seawater_conductivity,2017-11-27,51,100.0
5,RS03AXBS-LJ03A-12-CTDPFB301,seawater_pressure,2017-11-27,51,100.0
6,RS03AXBS-LJ03A-12-CTDPFB301,seawater_temperature,2017-11-27,51,100.0


In [59]:
new = a.merge(df3, how='outer')

In [60]:
new

Unnamed: 0,refdes,method,stream,parameter,time_stamp,data_points,percent
0,RS03AXBS-LJ03A-12-CTDPFB301,streamed,ctdpf_optode_sample,density,2017-11-27,51,100.0
1,RS03AXBS-LJ03A-12-CTDPFB301,streamed,ctdpf_optode_sample,practical_salinity,2017-11-27,51,100.0
2,RS03AXBS-LJ03A-12-CTDPFB301,streamed,ctdpf_optode_sample,dissolved_oxygen,2017-11-27,51,100.0
3,RS03AXBS-LJ03A-12-CTDPFB301,streamed,ctdpf_optode_sample,seawater_temperature,2017-11-27,51,100.0
4,RS03AXBS-LJ03A-12-CTDPFB301,streamed,ctdpf_optode_sample,seawater_pressure,2017-11-27,51,100.0
5,RS03AXBS-LJ03A-12-CTDPFB301,streamed,ctdpf_optode_sample,seawater_conductivity,2017-11-27,51,100.0
6,RS03AXBS-LJ03A-12-CTDPFB301,streamed,ctdpf_optode_sample,ctd_tc_oxygen,2017-11-27,51,100.0


In [80]:
request_inputs

Unnamed: 0,refdes,method,stream,parameter,deployment,date,start_date,end_date,subsite,platform,instrument,urls
0,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,1,2014-09-28,2014-09-28T00:00:05.000Z,2014-09-28T23:59:55.000Z,RS03ASHS,MJ03B,00-OSMOIA301,https://ooinet.oceanobservatories.org/api/m2m/...
1,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,1,2014-09-29,2014-09-29T00:00:05.000Z,2014-09-29T23:59:55.000Z,RS03ASHS,MJ03B,00-OSMOIA301,https://ooinet.oceanobservatories.org/api/m2m/...
2,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,1,2014-09-30,2014-09-30T00:00:05.000Z,2014-09-30T23:59:55.000Z,RS03ASHS,MJ03B,00-OSMOIA301,https://ooinet.oceanobservatories.org/api/m2m/...
3,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,1,2014-10-01,2014-10-01T00:00:05.000Z,2014-10-01T23:59:55.000Z,RS03ASHS,MJ03B,00-OSMOIA301,https://ooinet.oceanobservatories.org/api/m2m/...
4,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,1,2014-10-02,2014-10-02T00:00:05.000Z,2014-10-02T23:59:55.000Z,RS03ASHS,MJ03B,00-OSMOIA301,https://ooinet.oceanobservatories.org/api/m2m/...
5,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,1,2014-10-03,2014-10-03T00:00:05.000Z,2014-10-03T23:59:55.000Z,RS03ASHS,MJ03B,00-OSMOIA301,https://ooinet.oceanobservatories.org/api/m2m/...
6,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,1,2014-10-04,2014-10-04T00:00:05.000Z,2014-10-04T23:59:55.000Z,RS03ASHS,MJ03B,00-OSMOIA301,https://ooinet.oceanobservatories.org/api/m2m/...
7,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,1,2014-10-05,2014-10-05T00:00:05.000Z,2014-10-05T23:59:55.000Z,RS03ASHS,MJ03B,00-OSMOIA301,https://ooinet.oceanobservatories.org/api/m2m/...
8,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,1,2014-10-06,2014-10-06T00:00:05.000Z,2014-10-06T23:59:55.000Z,RS03ASHS,MJ03B,00-OSMOIA301,https://ooinet.oceanobservatories.org/api/m2m/...
9,RS03ASHS-MJ03B-00-OSMOIA301,recovered_inst,osmoi_a_subcon_instrument_recovered,temp,1,2014-10-07,2014-10-07T00:00:05.000Z,2014-10-07T23:59:55.000Z,RS03ASHS,MJ03B,00-OSMOIA301,https://ooinet.oceanobservatories.org/api/m2m/...


In [9]:
%%time

finaldf = pd.DataFrame()
missing = []

data_request_url = DATA_URL+\
                    'RS03ASHS/'+\
                    'MJ03B/'+\
                    '07-TMPSFA301/'+\
                    'streamed/'+\
                    'tmpsf_sample'+'?'+\
                    'beginDT=2017-09-04T17:54:58.050Z&'+\
                    'endDT=2017-09-11T23:54:58.050Z&'+\
                    'limit=50'
                    
r = requests.get(data_request_url, auth=(username, token))
data = r.json()

refdes_list = []
parameter_list = []
timestamp_list = []
value_list = []

# iterate through data points to extract time stamps
for i in range(len(data)):
    timestamp = data[i]['time']
    timestamp = datetime.datetime.utcfromtimestamp(timestamp - ntp_delta).replace(microsecond=0)
    timestamp = timestamp.date()

    # get refdes from the response and create data frame y with the corresponding gloabl range values
    refdes = data[i]['pk']['subsite'] + '-' + data[i]['pk']['node'] + '-' + data[i]['pk']['sensor']
    x = global_ranges['refdes'] == refdes
    y = global_ranges[x]

    # check if global range list contains an entry for the refdes
    templist = list(global_ranges['refdes'])
    if refdes not in templist:
        missing.append(refdes)


    # iterate through all variables in global range data frame y, then iterate through keys in data point
    # to find matching keys, then grab values
    for var in y.parameter.values:
        for j in data[i].keys():
            if var == j:
                z = data[i][j]

                # conditional to handle 2d datasets, in which case the first non nan value is checked
                if type(z) != list:
                    refdes_list.append(refdes)
                    parameter_list.append(var)
                    value_list.append(z)
                    timestamp_list.append(timestamp)
                else:
                    u = next(u for u in z if not isnan(u))
                    refdes_list.append(refdes)
                    parameter_list.append(var)
                    value_list.append(u)
                    timestamp_list.append(timestamp)


    # create data frame from lists collected above
    data_dict = {
        'refdes':refdes_list,
        'parameter':parameter_list,
        'value':value_list,
        'date':timestamp_list}
    response_data = pd.DataFrame(data_dict, columns = ['refdes','parameter','value','date'])

    # merge into data frame with global range values and check if value between global ranges
    df = y.merge(response_data,how='outer')
    df['pass'] = (df['value'] < pd.to_numeric(df['global_range_max'])) & \
                    (df['value'] > pd.to_numeric(df['global_range_min']))

    # collapse the data frame to calculate percent of data points that pass the test for that day
    df2 = df['pass'].groupby([df['refdes'], \
                df['parameter'],\
              df['date'] \
               ]).sum().reset_index()
    df2['percent'] = (df2['pass'] / len(data)) * 100

    # append result for this ref des and day to final data frame
    finaldf = finaldf.append(df2)

CPU times: user 836 ms, sys: 9.41 ms, total: 846 ms
Wall time: 2.6 s


In [15]:
len(df)

24024

In [16]:
len(data)

1001

In [18]:
)

24024