In [465]:
#May require to pip install netCDF4

Documentation: http://www.odip.org/documents/odip/downloads/20/argo-dm-user-manual.pdf

NetCDF: https://unidata.github.io/netcdf4-python/netCDF4/index.html

In [466]:
import pandas as pd
import numpy as np
import netCDF4
#import urllib
#import xarray


In [467]:
# commented out for later etl to download straight from ftp, open, and push to db
# url='ftp://usgodae.org/pub/outgoing/argo/geo/atlantic_ocean/2020/08/20200801_prof.nc'
# f = urllib.request.urlopen(url)

# turns out you have to use netcdf4 - this is a multidimensional dataset, so flat array packages like xarray doesn't work
argo = netCDF4.Dataset('data/20200101_prof.nc')
argo.variables


{'DATA_TYPE': <class 'netCDF4._netCDF4.Variable'>
 |S1 DATA_TYPE(STRING16)
     long_name: Data type
     conventions: Argo reference table 1
     _FillValue: b' '
 unlimited dimensions: 
 current shape = (16,)
 filling on,
 'FORMAT_VERSION': <class 'netCDF4._netCDF4.Variable'>
 |S1 FORMAT_VERSION(STRING4)
     long_name: File format version
     _FillValue: b' '
 unlimited dimensions: 
 current shape = (4,)
 filling on,
 'HANDBOOK_VERSION': <class 'netCDF4._netCDF4.Variable'>
 |S1 HANDBOOK_VERSION(STRING4)
     long_name: Data handbook version
     _FillValue: b' '
 unlimited dimensions: 
 current shape = (4,)
 filling on,
 'REFERENCE_DATE_TIME': <class 'netCDF4._netCDF4.Variable'>
 |S1 REFERENCE_DATE_TIME(DATE_TIME)
     long_name: Date of reference for Julian days
     conventions: YYYYMMDDHHMISS
     _FillValue: b' '
 unlimited dimensions: 
 current shape = (14,)
 filling on,
 'DATE_CREATION': <class 'netCDF4._netCDF4.Variable'>
 |S1 DATE_CREATION(DATE_TIME)
     long_name: Date of

In [468]:
for key in list(argo.variables.keys()):
    print(list(argo.variables.keys()).index(key), argo.variables[key][:].dtype, argo.variables[key][:].shape, key)

0 |S1 (16,) DATA_TYPE
1 |S1 (4,) FORMAT_VERSION
2 |S1 (4,) HANDBOOK_VERSION
3 |S1 (14,) REFERENCE_DATE_TIME
4 |S1 (14,) DATE_CREATION
5 |S1 (14,) DATE_UPDATE
6 |S1 (142, 8) PLATFORM_NUMBER
7 |S1 (142, 64) PROJECT_NAME
8 |S1 (142, 64) PI_NAME
9 |S1 (142, 3, 16) STATION_PARAMETERS
10 int32 (142,) CYCLE_NUMBER
11 |S1 (142,) DIRECTION
12 |S1 (142, 2) DATA_CENTRE
13 |S1 (142, 32) DC_REFERENCE
14 |S1 (142, 4) DATA_STATE_INDICATOR
15 |S1 (142,) DATA_MODE
16 |S1 (142, 32) PLATFORM_TYPE
17 |S1 (142, 32) FLOAT_SERIAL_NO
18 |S1 (142, 32) FIRMWARE_VERSION
19 |S1 (142, 4) WMO_INST_TYPE
20 float64 (142,) JULD
21 |S1 (142,) JULD_QC
22 float64 (142,) JULD_LOCATION
23 float64 (142,) LATITUDE
24 float64 (142,) LONGITUDE
25 |S1 (142,) POSITION_QC
26 |S1 (142, 8) POSITIONING_SYSTEM
27 |S1 (142,) PROFILE_PRES_QC
28 |S1 (142,) PROFILE_PSAL_QC
29 |S1 (142,) PROFILE_TEMP_QC
30 |S1 (142, 256) VERTICAL_SAMPLING_SCHEME
31 int32 (142,) CONFIG_MISSION_NUMBER
32 float32 (142, 2701) PRES
33 |S1 (142, 2701) PRES_QC
3

In [469]:
def utf_decoding(array):
    return [x.decode('UTF-8') for x in array.data]

def masked_arrays_decoding(masked_array):
    lst = []
    for idx, row in enumerate(masked_array):
        number = ''.join(utf_decoding(row)).strip()
        lst.append(number)
    return lst

def param_masked_arrays_decoding(masked_array):
    # TODO: improve this horrendous function. The scientific calibrations have 4 dimensions which complicate a bit the automation. 
    calibration = []
    for arrays in range(len(masked_array)):
        lst = []
        for idx, row in enumerate(masked_array[arrays].data):
            temp = []
            for x in range(len(row)):
                temp.append(''.join([y.decode('UTF-8') for y in row[x]]).strip())
            lst.append(temp)
        calibration.append(lst)
    return calibration

tt = {}
for var in list(argo.variables.keys())[:52]: 
    '''
    ['HISTORY_INSTITUTION', 'HISTORY_STEP', 'HISTORY_SOFTWARE', 'HISTORY_SOFTWARE_RELEASE', 'HISTORY_REFERENCE',
    'HISTORY_DATE', 'HISTORY_ACTION', 'HISTORY_PARAMETER', 'HISTORY_START_PRES', 'HISTORY_STOP_PRES', 
    'HISTORY_PREVIOUS_VALUE', 'HISTORY_QCTEST'] (argo.variables.key()[52:]) are empty so we should not bother with them.
    '''
    if len(argo.variables[var][:]) <= 16:
    # Deals with the first variables which have a single information in them e.g. ['DATA_TYPE', 'FORMAT_VERSION', 'HANDBOOK_VERSION', 'REFERENCE_DATE_TIME', 'DATE_CREATION', 'DATE_UPDATE']
        tt[var] = ''.join(utf_decoding(argo.variables[var][:])).strip()
    elif argo.variables[var][:].dtype != '|S1' and argo.variables[var][:].ndim == 1:
        tt[var] = argo.variables[var][:]
    elif argo.variables[var][:].dtype != '|S1' and argo.variables[var][:].ndim > 1:
        tt[var] = list(argo.variables[var][:].data)
    elif argo.variables[var][:].ndim == 1:
        tt[var] = utf_decoding(argo.variables[var][:])
    elif argo.variables[var][:].ndim == 2:
        tt[var] = masked_arrays_decoding(argo.variables[var][:])
    elif argo.variables[var][:].ndim == 3:
        tt[var] = [masked_arrays_decoding(x) for x in argo.variables[var][:]]
    else:
        tt[var] = param_masked_arrays_decoding(argo.variables[var][:])
    
    

argo_df = pd.DataFrame(tt)
argo_df.head()

Unnamed: 0,DATA_TYPE,FORMAT_VERSION,HANDBOOK_VERSION,REFERENCE_DATE_TIME,DATE_CREATION,DATE_UPDATE,PLATFORM_NUMBER,PROJECT_NAME,PI_NAME,STATION_PARAMETERS,...,TEMP,TEMP_QC,TEMP_ADJUSTED,TEMP_ADJUSTED_QC,TEMP_ADJUSTED_ERROR,PARAMETER,SCIENTIFIC_CALIB_EQUATION,SCIENTIFIC_CALIB_COEFFICIENT,SCIENTIFIC_CALIB_COMMENT,SCIENTIFIC_CALIB_DATE
0,Argo profile,3.1,1.2,19500101000000,20200829195536,20200829195536,1901731,US ARGO PROJECT,"BRECK OWENS, STEVEN JAYNE, P.E. ROBBINS","[PRES, TEMP, PSAL]",...,"[26.675, 26.67, 26.663, 26.662, 26.671, 26.669...",1111111111111111111111111111111111111111111111...,"[26.675, 26.67, 26.663, 26.662, 26.671, 26.669...",1111111111111111111111111111111111111111111111...,"[0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.0...","[[PRES, TEMP, PSAL], [, , ], [, , ]]","[[PRES_ADJUSTED = PRES, TEMP_ADJUSTED = TEMP, ...","[[None, None, CTM: alpha=0.141C, tau=6.89s, ri...",[[SOLO-W floats auto-correct mild pressure dri...,"[[20200825000000, 20200825000000, 202008250000..."
1,Argo profile,3.1,1.2,19500101000000,20200829195536,20200829195536,1901818,US ARGO PROJECT,"BRECK OWENS, STEVEN JAYNE, P.E. ROBBINS","[PRES, TEMP, PSAL]",...,"[27.102, 27.103, 27.103, 27.104, 27.104, 27.10...",1111111111111111111111111111111111111111111111...,"[27.102, 27.103, 27.103, 27.104, 27.104, 27.10...",1111111111111111111111111111111111111111111111...,"[0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.0...","[[PRES, TEMP, PSAL], [, , ], [, , ]]","[[PRES_ADJUSTED = PRES, TEMP_ADJUSTED = TEMP, ...","[[None, None, CTM: alpha=0.141C, tau=6.89s, ri...",[[SOLO-W floats auto-correct mild pressure dri...,"[[20200813000000, 20200813000000, 202008130000..."
2,Argo profile,3.1,1.2,19500101000000,20200829195536,20200829195536,1902184,US ARGO PROJECT,"BRECK OWENS, STEVEN JAYNE, P.E. ROBBINS","[PRES, TEMP, PSAL]",...,"[23.352, 23.352, 23.349, 23.348, 23.355, 23.35...",1111111111111111111111111111111111111111111111...,"[23.352, 23.352, 23.349, 23.348, 23.355, 23.35...",1111111111111111111111111111111111111111111111...,"[99999.0, 99999.0, 99999.0, 99999.0, 99999.0, ...","[[PRES, TEMP, PSAL], [, , ], [, , ]]","[[none, none, PSAL_ADJUSTED = salinity + salin...","[[, , salinity_offset = 0.0094105], [, , ], [...","[[, , PSAL ADJUST [dd mm yyyy N S_off stddev] ...","[[, , 20200101230048], [, , ], [, , ]]"
3,Argo profile,3.1,1.2,19500101000000,20200829195536,20200829195536,1902184,US ARGO PROJECT,"BRECK OWENS, STEVEN JAYNE, P.E. ROBBINS","[PRES, TEMP, PSAL]",...,"[23.377, 23.377, 23.382, 23.392, 23.395, 23.39...",1111111111111111111111111111111111111111111111...,"[23.377, 23.377, 23.382, 23.392, 23.395, 23.39...",1111111111111111111111111111111111111111111111...,"[99999.0, 99999.0, 99999.0, 99999.0, 99999.0, ...","[[PRES, TEMP, PSAL], [, , ], [, , ]]","[[none, none, PSAL_ADJUSTED = salinity + salin...","[[, , salinity_offset = 0.0094105], [, , ], [...","[[, , PSAL ADJUST [dd mm yyyy N S_off stddev] ...","[[, , 20200102150028], [, , ], [, , ]]"
4,Argo profile,3.1,1.2,19500101000000,20200829195536,20200829195536,1902208,US ARGO PROJECT,"BRECK OWENS, STEVEN JAYNE, P.E. ROBBINS","[PRES, TEMP, PSAL]",...,"[19.215, 19.229, 19.234, 19.233, 19.23, 19.226...",1111111111111111111111111111111111111111111111...,"[99999.0, 99999.0, 99999.0, 99999.0, 99999.0, ...",,"[99999.0, 99999.0, 99999.0, 99999.0, 99999.0, ...","[[PRES, TEMP, PSAL], [, , ], [, , ]]","[[none, none, none], [, , ], [, , ]]","[[, , ], [, , ], [, , ]]","[[, , ], [, , ], [, , ]]","[[, , ], [, , ], [, , ]]"


In [470]:
# TODO: decide whether expanding all parameters data or taking average. 
# Is depth indicated somewhere? or all measures taken at specific depth? TBC from documentation

In [486]:
import folium
m = folium.Map(location=[50, -60], zoom_start=4.5, tiles='Stamen Terrain')

for flt in argo_df.index:
    folium.Marker(location=[argo_df["LATITUDE"].iloc[flt], argo_df["LONGITUDE"].iloc[flt]]).add_to(m)

m