In [102]:
%%time
import matplotlib.pyplot as plt
import requests
from requests.packages.urllib3.util.retry import Retry
import json
import datetime
import pandas as pd
import concurrent.futures
import logging
import gc


# define your inputs
username = 'OOIAPI-30AZZ33CYL06XZ'
token = 'GPJJ7ACBPR9'
arrays = ['test']


# set up some functions
def request_data(url,username,token):
    auth = (username, token)
    return session.get(url,auth=auth)

def to_integer(dt_time):
    return 10000*dt_time.year + 100*dt_time.month + dt_time.day

def diff_days(d1,d2):
    return (d2 - d1).days


# base url for the request that will be built using the inputs above.
DEPLOYEMENT_URL = 'https://ooinet.oceanobservatories.org/api/m2m/12587/events/deployment/inv/'
DATA_URL = 'https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/'
parameter = '7'

ntp_epoch = datetime.datetime(1900, 1, 1)
unix_epoch = datetime.datetime(1970, 1, 1)
ntp_delta = (unix_epoch - ntp_epoch).total_seconds()

pool = concurrent.futures.ThreadPoolExecutor(max_workers=20)
session = requests.session()
retry = Retry(
        total=10,
        backoff_factor=0.3,
    )
adapter = requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100,max_retries=retry,pool_block=True)
session.mount('http://', adapter)


for array in arrays:

    logging.basicConfig(filename=array+'_requests.log',level=logging.DEBUG)

    refdes = 'input/' + array + '.csv'
    refdes_list = pd.read_csv(refdes)
    refdes_list = refdes_list['refdes']
    refdes_list = refdes_list.drop_duplicates()

    print('\n'+"working on", array)
    print("building deployment info requests...")
    asset_requests = []
    for i in refdes_list:
        sub_site = i[:8]
        platform = i[9:14]
        instrument = i[15:27]
        asset_url_inputs = '/'.join((sub_site, platform, instrument))
        request_url = DEPLOYEMENT_URL+asset_url_inputs+'/-1'
        asset_requests.append(request_url)

    print("sending deployment info requests...")
    ref_des_list = []
    start_time_list = []
    end_time_list = []
    deployment_list = []

    future_to_url = {pool.submit(request_data, url, username, token): url for url in asset_requests}
    for future in concurrent.futures.as_completed(future_to_url):
        try:
            asset_info = future.result()
            asset_info = asset_info.json()
            
            for i in range(len(asset_info)):
                refdes = asset_info[i]['referenceDesignator']
                ref_des_list.append(refdes)
                
                deployment = asset_info[i]['deploymentNumber']
                deployment_list.append(deployment)
                
                start = asset_info[i]['eventStartTime']
                end = asset_info[i]['eventStopTime']
                
                try:
                    start_time = datetime.datetime.utcfromtimestamp(start/1000.0)
                    start_time_list.append(start_time)
                    
                    end_time = datetime.datetime.utcfromtimestamp(end/1000.0)
                    end_time_list.append(end_time)
                    
                except:
                    end_time = datetime.datetime.utcnow()
                    end_time_list.append(end_time)
                    
        except:
            pass

    data_dict = {
        'refdes':ref_des_list,
        'deployment':deployment_list,
        'start_time':start_time_list,
        'end_time':end_time_list}
    deployment_data = pd.DataFrame(data_dict, columns = ['refdes', 'deployment','start_time', 'end_time'])



    print("calculating days between deployment dates...")
    deployment_data_days = pd.DataFrame(columns = ['refdes', 'deployment','date'])

    # calculate days between deployment dates
    for index, row in deployment_data.iterrows():
        start_time = row['start_time']
        end_time = row['end_time']
        periods = diff_days(start_time, end_time)
        start_time = to_integer(start_time)
        total_days = pd.DataFrame({'date' : pd.date_range(str(start_time),periods=periods,freq='D')})
        
        total_days['refdes'] = row['refdes']
        total_days['deployment'] = row['deployment']
        deployment_data_days = deployment_data_days.append(total_days)

    # re-order data frame columns
    deployment_data_days = deployment_data_days[['refdes', 'deployment','date']]







    print("building data request urls...")
    deployment_data_days['start_date'] = deployment_data_days['date'] + datetime.timedelta(seconds=5)
    deployment_data_days['end_date'] = deployment_data_days['date'] + datetime.timedelta(seconds=86395)

    refdes_streams = 'input/' + array + '.csv'
    refdes_streams_df = pd.read_csv(refdes_streams)

    request_inputs = pd.merge(refdes_streams_df,deployment_data_days, on='refdes')

    request_inputs['subsite'] = request_inputs.refdes.str[:8]
    request_inputs['platform'] = request_inputs.refdes.str[9:14]
    request_inputs['instrument'] = request_inputs.refdes.str[15:27]
    request_inputs['start_date'] = pd.to_datetime(request_inputs['start_date'])
    request_inputs['start_date'] = request_inputs.start_date.dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')
    request_inputs['end_date'] = pd.to_datetime(request_inputs['end_date'])
    request_inputs['end_date'] = request_inputs.end_date.dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')

    request_inputs['urls'] = DATA_URL+\
                            request_inputs.subsite+\
                            '/'+request_inputs.platform+\
                            '/'+request_inputs.instrument+\
                            '/'+request_inputs.method+\
                            '/'+request_inputs.stream+\
                            '?beginDT='+request_inputs.start_date+\
                            '&endDT='+request_inputs.end_date+\
                            '&limit=1000&parameters='+parameter
                            
#     request_inputs = request_inputs[['urls']]
#     request_inputs_test = request_inputs.drop_duplicates()

    request_urls = request_inputs['urls'].values.tolist()


    print("sending data requests for", array+'...')
    print('\t',"current time:", datetime.datetime.now())
    print('\t',len(request_urls ),"data requests being sent")
    print('\t',"check",array+"_requests.log","file in your working directory for progress")


    ref_des_list = []
    deployment_list = []
    method_list = []
    stream_list = []
    timestamp_list = []
    
    urls_test = []


    future_to_url = {pool.submit(request_data, url, username, token): url for url in request_urls}
    for future in concurrent.futures.as_completed(future_to_url):
#         url = future_to_url[future]
        try: 
            data = future.result() 
            data = data.json()
            reference_designator = data[5]['pk']['subsite'] + '-' + data[5]['pk']['node'] + '-' + data[5]['pk']['sensor']
            deployment = data[5]['pk']['deployment']
            method = data[5]['pk']['method']
            stream = data[5]['pk']['stream']
            timestamp = data[5]['time']
            timestamp = datetime.datetime.utcfromtimestamp(timestamp - ntp_delta).replace(microsecond=0)
            timestamp = timestamp.date()
            # print(reference_designator, deployment, method, stream, timestamp, future.result())

            ref_des_list.append(reference_designator)
            deployment_list.append(deployment)
            method_list.append(method)
            stream_list.append(stream)
            timestamp_list.append(timestamp)
            
#             urls_test.append(url)

        except:
#             print('no data for ', url)
            pass






    # convert lists to data frame
    data_dict = {
        'refdes':ref_des_list,
#         'deployment':deployment_list,
        'method':method_list,
        'stream':stream_list,
        'date':timestamp_list}
    ooi_data = pd.DataFrame(data_dict, columns = ['refdes','method','stream', 'date'])
    ooi_data = ooi_data.drop_duplicates() # drops days from overlapping deployment time ranges
    ooi_data['date'] = pd.to_datetime(ooi_data['date'])
    ooi_data['date'] = ooi_data.date.dt.strftime('%Y-%m-%d')

    request_inputs = request_inputs[['refdes','method','stream', 'date']]
    request_inputs = request_inputs.drop_duplicates() # drops days from overlapping deployment time ranges
    request_inputs['date'] = pd.to_datetime(request_inputs['date'])
    request_inputs['date'] = request_inputs.date.dt.strftime('%Y-%m-%d')

    deployed_but_no_data = ooi_data.merge(request_inputs,indicator=True, how='outer')
    deployed_but_no_data = deployed_but_no_data[deployed_but_no_data['_merge'] == 'right_only']
    del deployed_but_no_data['_merge']

    deployed_but_no_data['value'] = 0
    ooi_data['value'] = 1

    output = pd.concat([ooi_data, deployed_but_no_data])


working on test
building deployment info requests...
sending deployment info requests...
calculating days between deployment dates...
building data request urls...
sending data requests for test...
	 current time: 2017-11-10 13:10:30.646433
	 1162 data requests being sent
	 check test_requests.log file in your working directory for progress
CPU times: user 7.6 s, sys: 601 ms, total: 8.2 s
Wall time: 1min 26s


In [105]:
output

Unnamed: 0,refdes,method,stream,date,value
0,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-18,1
1,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-16,1
2,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-22,1
3,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-30,1
4,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-13,1
5,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-12,1
6,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-10-01,1
7,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-28,1
8,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-17,1
9,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-27,1


In [103]:
final = output[output.duplicated(keep=False)]

In [104]:
final

Unnamed: 0,refdes,method,stream,date,value


In [101]:
output.to_csv('output/'+array+'.csv', index=False)

In [45]:
urls = urls[urls.duplicated(keep=False)]

In [46]:
urls

Unnamed: 0,urls
668,https://ooinet.oceanobservatories.org/api/m2m/...
669,https://ooinet.oceanobservatories.org/api/m2m/...
670,https://ooinet.oceanobservatories.org/api/m2m/...
671,https://ooinet.oceanobservatories.org/api/m2m/...
672,https://ooinet.oceanobservatories.org/api/m2m/...
673,https://ooinet.oceanobservatories.org/api/m2m/...
674,https://ooinet.oceanobservatories.org/api/m2m/...
675,https://ooinet.oceanobservatories.org/api/m2m/...
1063,https://ooinet.oceanobservatories.org/api/m2m/...
1064,https://ooinet.oceanobservatories.org/api/m2m/...


In [47]:
request_inputs[1060:1070]

Unnamed: 0,refdes,method,stream,deployment,date,start_date,end_date,subsite,platform,instrument,urls
1060,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,3,2017-08-03,2017-08-03T00:00:05.000Z,2017-08-03T23:59:55.000Z,GI02HYPM,WFP02,04-CTDPFL000,https://ooinet.oceanobservatories.org/api/m2m/...
1061,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,3,2017-08-04,2017-08-04T00:00:05.000Z,2017-08-04T23:59:55.000Z,GI02HYPM,WFP02,04-CTDPFL000,https://ooinet.oceanobservatories.org/api/m2m/...
1062,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,3,2017-08-05,2017-08-05T00:00:05.000Z,2017-08-05T23:59:55.000Z,GI02HYPM,WFP02,04-CTDPFL000,https://ooinet.oceanobservatories.org/api/m2m/...
1063,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,3,2017-08-06,2017-08-06T00:00:05.000Z,2017-08-06T23:59:55.000Z,GI02HYPM,WFP02,04-CTDPFL000,https://ooinet.oceanobservatories.org/api/m2m/...
1064,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,3,2017-08-07,2017-08-07T00:00:05.000Z,2017-08-07T23:59:55.000Z,GI02HYPM,WFP02,04-CTDPFL000,https://ooinet.oceanobservatories.org/api/m2m/...
1065,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,3,2017-08-08,2017-08-08T00:00:05.000Z,2017-08-08T23:59:55.000Z,GI02HYPM,WFP02,04-CTDPFL000,https://ooinet.oceanobservatories.org/api/m2m/...
1066,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,4,2017-08-06,2017-08-06T00:00:05.000Z,2017-08-06T23:59:55.000Z,GI02HYPM,WFP02,04-CTDPFL000,https://ooinet.oceanobservatories.org/api/m2m/...
1067,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,4,2017-08-07,2017-08-07T00:00:05.000Z,2017-08-07T23:59:55.000Z,GI02HYPM,WFP02,04-CTDPFL000,https://ooinet.oceanobservatories.org/api/m2m/...
1068,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,4,2017-08-08,2017-08-08T00:00:05.000Z,2017-08-08T23:59:55.000Z,GI02HYPM,WFP02,04-CTDPFL000,https://ooinet.oceanobservatories.org/api/m2m/...
1069,GI02HYPM-WFP02-04-CTDPFL000,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,4,2017-08-09,2017-08-09T00:00:05.000Z,2017-08-09T23:59:55.000Z,GI02HYPM,WFP02,04-CTDPFL000,https://ooinet.oceanobservatories.org/api/m2m/...


In [49]:
urls_test = pd.DataFrame(urls_test)
sent_urls = urls_test[urls_test.duplicated(keep=False)]

In [50]:
sent_urls

Unnamed: 0,0
652,https://ooinet.oceanobservatories.org/api/m2m/...
653,https://ooinet.oceanobservatories.org/api/m2m/...
655,https://ooinet.oceanobservatories.org/api/m2m/...
659,https://ooinet.oceanobservatories.org/api/m2m/...
661,https://ooinet.oceanobservatories.org/api/m2m/...
666,https://ooinet.oceanobservatories.org/api/m2m/...


In [51]:
trial = ooi_data[ooi_data.duplicated(keep=False)]

In [52]:
trial

Unnamed: 0,refdes,deployment,method,stream,date,value
0,GI02HYPM-WFP02-04-CTDPFL000,1,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-29,1
4,GI02HYPM-WFP02-04-CTDPFL000,1,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-15,1
5,GI02HYPM-WFP02-04-CTDPFL000,1,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-12,1
7,GI02HYPM-WFP02-04-CTDPFL000,1,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-12,1
12,GI02HYPM-WFP02-04-CTDPFL000,1,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-15,1
14,GI02HYPM-WFP02-04-CTDPFL000,1,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-29,1
17,GI02HYPM-WFP02-04-CTDPFL000,1,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-10-06,1
18,GI02HYPM-WFP02-04-CTDPFL000,1,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-22,1
20,GI02HYPM-WFP02-04-CTDPFL000,1,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-09-22,1
25,GI02HYPM-WFP02-04-CTDPFL000,1,recovered_wfp,ctdpf_ckl_wfp_instrument_recovered,2014-10-06,1


In [54]:
len(urls_test)

1021

In [55]:
len(ooi_data)

1021

In [57]:
trial = ooi_data[ooi_data.duplicated(keep=False)]

In [61]:
ooi_data['url']

651    https://ooinet.oceanobservatories.org/api/m2m/...
653    https://ooinet.oceanobservatories.org/api/m2m/...
654    https://ooinet.oceanobservatories.org/api/m2m/...
658    https://ooinet.oceanobservatories.org/api/m2m/...
659    https://ooinet.oceanobservatories.org/api/m2m/...
665    https://ooinet.oceanobservatories.org/api/m2m/...
Name: url, dtype: object

In [60]:
trial['url']

651    https://ooinet.oceanobservatories.org/api/m2m/...
653    https://ooinet.oceanobservatories.org/api/m2m/...
654    https://ooinet.oceanobservatories.org/api/m2m/...
658    https://ooinet.oceanobservatories.org/api/m2m/...
659    https://ooinet.oceanobservatories.org/api/m2m/...
665    https://ooinet.oceanobservatories.org/api/m2m/...
Name: url, dtype: object

In [92]:
request_url = 'https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/'+ \
                'GI02HYPM/WFP02/04-CTDPFL000/recovered_wfp/ctdpf_ckl_wfp_instrument_recovered?'+ \
                'beginDT=2014-09-13T00:00:00.000Z&'+\
                'endDT=2014-09-14T00:00:00.000Z'+\
                '&limit=1000'     
        
r = requests.get(request_url, auth=(username, token))

In [93]:
data = r.json()

In [94]:
for i in range(len(data)):
    timestamp = data[i]['time']
    timestamp = datetime.datetime.utcfromtimestamp(timestamp - ntp_delta).replace(microsecond=0)
#     timestamp = timestamp.date()
    print(timestamp)
    

2014-09-12 07:39:27
2014-09-13 01:00:02
2014-09-13 01:00:37
2014-09-13 01:02:03
2014-09-13 01:03:29
2014-09-13 01:04:56
2014-09-13 01:06:22
2014-09-13 01:07:48
2014-09-13 01:09:15
2014-09-13 01:10:41
2014-09-13 01:12:08
2014-09-13 01:13:34
2014-09-13 01:15:01
2014-09-13 01:16:28
2014-09-13 01:17:54
2014-09-13 01:19:21
2014-09-13 01:20:47
2014-09-13 01:22:13
2014-09-13 01:23:40
2014-09-13 01:25:06
2014-09-13 01:26:32
2014-09-13 01:27:59
2014-09-13 01:29:26
2014-09-13 01:30:53
2014-09-13 01:32:19
2014-09-13 01:33:45
2014-09-13 01:35:12
2014-09-13 01:36:38
2014-09-13 01:38:04
2014-09-13 01:39:31
2014-09-13 01:40:57
2014-09-13 01:42:25
2014-09-13 01:43:51
2014-09-13 01:45:17
2014-09-13 01:46:44
2014-09-13 01:48:10
2014-09-13 01:49:36
2014-09-13 01:51:03
2014-09-13 01:52:29
2014-09-13 01:53:56
2014-09-13 01:55:22
2014-09-13 01:56:49
2014-09-13 01:58:16
2014-09-13 01:59:42
2014-09-13 02:01:09
2014-09-13 02:02:35
2014-09-13 02:04:01
2014-09-13 02:05:28
2014-09-13 02:06:54
2014-09-13 02:08:20
