In [None]:
#PACKAGES
import requests
import json
import pandas

In [None]:
#SOCIB API settings
api_key = '' #write you SOCIB api_key => get it from http://api.socib.es/home/ (formulary at the bottom of the page)
api_url = 'http://api.socib.es'
headers = {
    'accept': 'application/vnd.socib+json',
    'apikey': api_key,
}

In [None]:
#API QUERY ELEMENTS
platform = 'Mobims_SonBou' #check for more ids at: http://api.socib.es/platforms/ ---> filter by passing a platform_type parameter if needed (see available codes at: http://api.socib.es/platform-types/)
end_point = '/entries/?platform='+platform

In [None]:
#AUXILIARY FUNCTIONS
def instrument_details(data, apiCache):
    #it handles the implicit info retourned as value for the intrument; in particular, it will return its name, type and category
    url = entry['instrument']
    if url != None:
        #instrument name
        try:
            response = apiCache[url]
        except:
            request = requests.get(url, headers=headers)
            apiCache[url] = json.loads(request.text)
            response = apiCache[url]
        data['instrument_name'] = response['name']
        #instrument type and category
        url = response['type']
        try:
            response = apiCache[url]
        except:
            request = requests.get(url, headers=headers)
            apiCache[url] = json.loads(request.text)
            response = apiCache[url]
            data['instrument_type'] = response['name']
            data['instrument_category'] = response['type']
    else: #aggregations case
        data['instrument_name'] = 'N/A'
        data['instrument_type'] = 'N/A'
        data['instrument_category'] = 'N/A'
    return [data,apiCache]

In [None]:
def variables_details(data, apiCache):
    #it handles the implicit info retourned as value for the variables; in particular it will return the variable code.
    data['variables'] = ''
    url = entry['variables']
    try:
        response = apiCache[url]
    except:
        request = requests.get(url, headers=headers)
        apiCache[url] = json.loads(request.text)
        response = apiCache[url]
        for variable in response:
            url = variable['variable']
            try:
                response = apiCache[url]
            except:
                request = requests.get(url, headers=headers)
                apiCache[url] = json.loads(request.text)
                response = apiCache[url]
            try:
                data['variables'] = data['variables'] +','+ response['code']
            except:
                pass
    return [data, apiCache]

In [None]:
#Getting raw array of netCDFs (entries) from a given platform -> the resulting array preserves defauting fields 
apiCache = {}
entries = []
url = '%s%s' % (api_url, end_point)
while url != None:
    request_ = requests.get(url, headers=headers)
    response_ = json.loads(request_.text)
    for entry in response_['results']:#loop over netCDFs (entries)
        data = {}
        #inherited info
        data['id'] = entry['id']
        data['processing_level'] = entry['processing_level']
        data['data_mode'] = entry['data_mode']
        data['initial_datetime'] = entry['initial_datetime']
        data['end_datetime'] = entry['end_datetime']
        data['opendap_url'] = entry['services']['opendap']['url']
        data['http_url'] = entry['services']['http_file']['url']
        data['catalog_url'] = entry['services']['thredds_catalog']['url']
        #derived info
        data['netCDF_name'] = entry['services']['opendap']['url'].split('/')[-1]
        #implicit info 
        data, apiCache = variables_details(data,apiCache) #variables handler
        data, apiCache = instrument_details(data,apiCache) #instrument handler
        entries.append(data)
    url = response_['next']
    print(url)

In [None]:
#Getting CSV -> ordering the ouput by date, isntrument type, instrument name and processing level
csv_name = platform+'_netCDFs.csv'
ordered_header = ['instrument_category', 'instrument_type', 'instrument_name', 'processing_level', 'data_mode', 'initial_datetime', 'end_datetime', 'variables', 'netCDF_name', 'id', 'opendap_url', 'http_url', 'catalog_url']
nc_dataframe = pandas.DataFrame.from_dict(entries).sort_values(['instrument_category','instrument_type', 'processing_level', 'instrument_name', 'end_datetime'])
nc_dataframe[ordered_header].to_csv(csv_name,index=False)