In [25]:
import pathlib
import io
import datetime
import json

import netCDF4
import numpy as np
import pandas as pd
import tqdm

import geojson

In [17]:
path = pathlib.Path('~/data/odv/data_from_SDN_2017-11_TS_profiles_non-restricted_med.nc').expanduser()
ds = netCDF4.Dataset(path)

In [3]:
lat = ds.variables['latitude'][:]
lon = ds.variables['longitude'][:]

In [4]:
points = np.c_[lon, lat].tolist()
pts = geojson.MultiPoint(points)

In [5]:
with open('geojson.json', 'w') as f:
    geojson.dump(pts, f)



stream = io.StringIO()
geojson.dump(pts, stream)

In [6]:

def antimeridian_cut(lon):
    """longitudes > 180 -> -360"""
    return np.mod(np.array(lon) + 180, 360) - 180

features = []

# slicing in time!
for year in range(2000, 2019):
    t0 = netCDF4.date2num(
        datetime.datetime(year=year, month=1, day=1),
        ds.variables['date_time'].units
    )
    t1 = netCDF4.date2num(
        datetime.datetime(year=year + 1, month=1, day=1),
        ds.variables['date_time'].units
    )

    # ensure that our array is always masked
    date_time = np.ma.masked_array(
        ds.variables['date_time'][:]
    )
    is_in_date = np.logical_and(
        date_time[:] >= t0,
        date_time[:] < t1
    ).data
    if not is_in_date.any():
        # no data, skipping
        continue
    t = np.empty(
        len(date_time[is_in_date]),
        dtype=type(datetime.datetime.now())
    )

    # split nans and notnans makes it much faster
    dtf = np.where(date_time[is_in_date].mask == False)
    dtt = np.where(date_time[is_in_date].mask == True)
    t[dtf] = netCDF4.num2date(
        date_time[is_in_date][dtf],
        ds.variables['date_time'].units
    )
    # do we have any masked values
    if dtt and dtt[0]:
        t[dtt] = netCDF4.num2date(
            date_time[is_in_date][dtt],
            ds.variables['date_time'].units
        )

    # # TODO: slicing through Depth... Hard with this sort of unstructured netcdf.
    # if data['var1'].long_name == "Depth":
    #     depth = None
    # else:
    depth = None

    if 'lat' in ds.variables:
        lat = ds['lat'][is_in_date]
    elif 'latitude' in ds.variables:
        lat = ds['latitude'][is_in_date]
    if 'lon' in ds.variables:
        lon = ds['lon'][is_in_date]
    elif 'longitude' in ds.variables:
        lon = ds['longitude'][is_in_date]


    cdi_id = netCDF4.chartostring(ds.variables['metavar4'][is_in_date])

    coordinates = np.c_[
        antimeridian_cut(lon),
        lat
    ].tolist()


    for i, (coordinate, cdi_id_i) in enumerate(zip(coordinates, cdi_id)):
        geometry = geojson.Point(coordinate)
        feature = geojson.Feature(
            id=i,
            geometry=geometry,
            properties={
                "cdi_id": cdi_id_i,
                "year": year
            }
        )
        features.append(feature)

collection = geojson.FeatureCollection(features=features)
with open('features.json', 'w') as f:
    geojson.dump(collection, f)



In [31]:
cdi_id_set = {
    feature.properties['cdi_id']
    for feature 
    in collection.features
}


(1, 9816)

In [None]:
if 'lat' in ds.variables:
    station_lat = ds['lat'][:]
elif 'latitude' in ds.variables:
    station_lat = ds['latitude'][:]
if 'lon' in ds.variables:
    station_lon = ds['lon'][:]
elif 'longitude' in ds.variables:
    station_lon = ds['longitude'][:]

cdi_ids = netCDF4.chartostring(ds.variables['metavar4'][:])
jsons = []
for i, cdi_id in enumerate(tqdm.tqdm(list(cdi_id_set))):
    
    cdi_id = str(cdi_id)


    # get the first
    idx = cdi_ids == cdi_id
    

    var_names = [
        name
        for name, var
        in ds.variables.items()
        if name.startswith('var') and not '_' in name
    ]

    # add the variables to the list
    variables = {}
    for var_name in var_names:
        var = ds.variables[var_name]
        variables[var.long_name] = np.squeeze(var[idx]).ravel()

    # get metadata
    date_nums = ds.variables['date_time'][idx] 
    
    date_units = ds.variables['date_time'].units
    date = netCDF4.num2date(date_nums.ravel().max(), date_units)
    
    date_nums_expanded = np.zeros_like(var[idx].filled()) + np.atleast_2d(date_nums).T
    dates = netCDF4.num2date(
        date_nums_expanded,
        date_units
    )
    
    variables['Date'] = [date_i.isoformat() for date_i in list(dates.ravel())]

        
    df = pd.DataFrame(data=variables, index=np.arange(np.squeeze(var[idx]).ravel().shape[0]))
    # get rid of missing data
    df = df.dropna(how='all')

    records = json.loads(df.to_json(orient='records'))


    response = {
        "data": records,
        "meta": {
            "date": date.isoformat(),
            "cdi_id": cdi_id
        }
    }
    jsons.append(response)
    if i > 10:
        break


  0%|          | 0/71810 [00:00<?, ?it/s][A
  0%|          | 2/71810 [00:00<1:41:37, 11.78it/s][A
  0%|          | 4/71810 [00:00<1:36:56, 12.34it/s][A
  0%|          | 6/71810 [00:00<1:35:22, 12.55it/s][A