In [1]:
% matplotlib inline
import requests, warnings, json, time, os, re
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

### IOOS Southern Ocean Data requests

Ask Sheri about the Water pCO2 <br>
DO - looks bad (Be weary)!

__Creating a credentials file__ <br>
This is done so you aren't uploading your private token and username onto a github repo for the whole world to see

1. Create a file called credentials.json in this folder.
2. Add your token and username info so it looks something like this:

```
{
    "username": "OOIAPI-FAKEAPINAME",
    "token": "BBN3YLYO783"
}

```

In [2]:
# Load your credentials
with open("credentials.json", "r") as read_file:
    data = json.load(read_file)
token = data['token']
username = data['username']

### Pull Surface Nitrate Data from the [Apex Surface Mooring](http://ooi.visualocean.net/instruments/view/GS01SUMO-RID16-03-CTDBPF000)

Example M2M url:
https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/GS01SUMO/RID16/03-CTDBPF000/metadata

In [9]:
# Build API Query
DATA_API_BASE_URL = 'https://ooinet.oceanobservatories.org/api/m2m/12576/sensor/inv/'
data_request_url = DATA_API_BASE_URL+\
                    'GS01SUMO/'+\
                    'RID16/'+\
                    '07-NUTNRB000//'+\
                    'telemetered/'+\
                    'nutnr_b_dcl_full_instrument'+'?'

r = requests.get(data_request_url, params=None, auth=(username, token)) # Request data
data = r.json() # verify request

In [10]:
# if data says something about a 404 message, double check the API url. Otherwwi
print(data)

{'outputURL': 'https://opendap.oceanobservatories.org/thredds/catalog/ooi/pdaniel@mbari.org/20180823T215502-GS01SUMO-RID16-07-NUTNRB000-telemetered-nutnr_b_dcl_full_instrument/catalog.html', 'numberOfSubJobs': 38, 'requestUUID': '58a681af-4cf7-4a4b-a2f8-02ae4a04d22e', 'allURLs': ['https://opendap.oceanobservatories.org/thredds/catalog/ooi/pdaniel@mbari.org/20180823T215502-GS01SUMO-RID16-07-NUTNRB000-telemetered-nutnr_b_dcl_full_instrument/catalog.html', 'https://opendap.oceanobservatories.org/async_results/pdaniel@mbari.org/20180823T215502-GS01SUMO-RID16-07-NUTNRB000-telemetered-nutnr_b_dcl_full_instrument'], 'sizeCalculation': 68874558, 'timeCalculation': 60}


__Waiting for the dataset to be built and sent to your THREDDS__

In [11]:
%%time
check_complete = data['allURLs'][1] + '/status.txt'
for i in range(1800): 
    r = requests.get(check_complete)
    if r.status_code == requests.codes.ok:
        print('request completed')
        break
    else:
        time.sleep(1)

request completed
CPU times: user 1.43 s, sys: 94.5 ms, total: 1.52 s
Wall time: 1min 30s


### Pull the data from Thredds ###

This chunk of code scrapes the thredds server associated with your username and finds each of the netCDF files that are associated with each deployment (three files in this case) and puts there URLs into a list

In [None]:
url = data['allURLs'][0]
tds_url = 'https://opendap.oceanobservatories.org/thredds/dodsC'
datasets = requests.get(url).text
urls = re.findall(r'href=[\'"]?([^\'" >]+)', datasets)
x = re.findall(r'(ooi/.*?.nc)', datasets)
for i in x:
    if i.endswith('.nc') == False:
        x.remove(i)
for i in x:
    try:
        float(i[-4])
    except:
        x.remove(i)
datasets = [os.path.join(tds_url, i) for i in x]
datasets

Xarray is amazing and can open multiple netCDF files if they have the same Coordinates, (which each instrument deployment should have!). 

These data a thrown into a xarray Dataset (this is sort of the fundamental unit of xarray and is basically a 3-D array (or more) that keeps some track of some of the metadata)|

In [4]:
# This is a lazy hack to get the data once its already by queried and dumped in my thredds server
datasets = ['https://opendap.oceanobservatories.org/thredds/dodsC/ooi/pdaniel@mbari.org/20180822T045324-GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument/deployment0003_GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument_20161125T011706.633000-20180821T234514.654000.nc',
 'https://opendap.oceanobservatories.org/thredds/dodsC/ooi/pdaniel@mbari.org/20180822T045324-GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument/deployment0002_GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument_20151214T202006.149000-20161205T090522.930000.nc',
 'https://opendap.oceanobservatories.org/thredds/dodsC/ooi/pdaniel@mbari.org/20180822T045324-GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument/deployment0001_GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument_20150218T211507.035000-20150611T000008.454000.nc']

In [None]:
def open_data(file):
    ds = xr.open_dataset(file)
    ds.swamp_dim
    return 

In [8]:
all_ds = [xr.open_dataset(file) for file in datasets]


# for file in datasets:
#     ds = xr.open_dataset(file)


0 https://opendap.oceanobservatories.org/thredds/dodsC/ooi/pdaniel@mbari.org/20180822T045324-GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument/deployment0003_GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument_20161125T011706.633000-20180821T234514.654000.nc
1 https://opendap.oceanobservatories.org/thredds/dodsC/ooi/pdaniel@mbari.org/20180822T045324-GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument/deployment0002_GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument_20151214T202006.149000-20161205T090522.930000.nc
2 https://opendap.oceanobservatories.org/thredds/dodsC/ooi/pdaniel@mbari.org/20180822T045324-GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument/deployment0001_GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument_20150218T211507.035000-20150611T000008.454000.nc


In [8]:
data_url = "https://opendap.oceanobservatories.org/thredds/dodsC/ooi/pdaniel@mbari.org/20180822T045324-GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument/deployment0003_GS01SUMO-RID16-03-CTDBPF000-telemetered-ctdbp_cdef_dcl_instrument_20161125T011706.633000-20180821T234514.654000.nc"
ds = xr.open_dataset(data_url)

In [10]:
ds = xr.open_mfdataset(datasets, compat)
ds = ds.swap_dims({'obs': 'time'})
ds = ds.sortby('time') # data from different deployments can overlap so we want to sort all data by time stamp.

Lets print the data varaibles and the corresponding standard names

In [15]:
for var in ds.variables:
    try:
        ds[var].standard_name
        print(var)
    except: #hack to only print variables that have a standard name attribute
        pass

time
conductivity
pressure
temp
practical_salinity
density
lat
lon


In [3]:
%%time
variables_of_interest = ["conductivity","pressure","temp","practical_salinity","density","time"]
data = [ds[var].values for var in variables_of_interest]
data_array = np.array((data))
data_array = data_array.T

NameError: name 'ds' is not defined

__ Convert dataset into a pandas dataframe __

In [61]:
df = pd.DataFrame(data=data_array,columns=variables_of_interest)
df['dateTime'] = pd.to_datetime(df['time'])
df.index = df['dateTime']
df.drop(labels=['time'],axis=1)
df = df.convert_objects(convert_numeric=True);

#### Subsample to hourly data

In [63]:
hourly = df.resample(rule='1H').mean()
hourly['dateTime'] = hourly.index

### Pickle data for ease of access

In [65]:
hourly.to_pickle('./data_dump/surface_ctd')