In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr

import geopandas as gpd
import geodatasets

import hvplot
import hvplot.pandas

In [2]:
fname = 'WaveTimeSeries_ERA5_CanadianBorder_to_Nuvuk_10mDepth.nc'

fpath = os.path.join(os.getcwd(), "raw_datasets/engelstad/", fname)

ds = xr.open_dataset(fpath, chunks={'t':1000})

print(ds)

<xarray.Dataset> Size: 11GB
Dimensions:     (time: 119800, numberOfCharacters: 11, transect: 2381,
                 longitude: 2381, latitude: 2381)
Coordinates:
  * time        (time) datetime64[ns] 958kB 1979-01-01 ... 2019-12-31T21:00:00
    transect    (numberOfCharacters, transect) |S1 26kB dask.array<chunksize=(11, 2381), meta=np.ndarray>
  * longitude   (longitude) float32 10kB -145.5 -145.5 -145.5 ... -155.0 -155.0
  * latitude    (latitude) float32 10kB 70.1 70.1 70.11 ... 71.18 71.18 71.18
Dimensions without coordinates: numberOfCharacters
Data variables:
    depth       (transect) float32 10kB dask.array<chunksize=(2381,), meta=np.ndarray>
    Hs          (transect, time) float32 1GB dask.array<chunksize=(2381, 119800), meta=np.ndarray>
    Tm01        (transect, time) float32 1GB dask.array<chunksize=(2381, 119800), meta=np.ndarray>
    Dm          (transect, time) float32 1GB dask.array<chunksize=(2381, 119800), meta=np.ndarray>
    Flag_D      (transect, time) float32 1GB

In [3]:
lon_min, lon_max = -143.75, -143.60
lat_min, lat_max = 70.13, 70.17

lat_bi, lon_bi = 70.133940, -143.678497

In [4]:
transect_ids = []

for row in ds.transect.values.T:
    id = ''.join(x.decode('UTF-8') for x in row)
    transect_ids.append(id)
    
transect_ids = np.array(transect_ids, dtype=str)

print(transect_ids)

['ECB_10_1298' 'ECB_10_1306' 'ECB_10_1314' ... 'SWB_9_608  ' 'SWB_9_616  '
 'SWB_9_624  ']


In [23]:
gdf = gpd.GeoDataFrame(
    pd.DataFrame(
        {
            "ID":transect_ids,
            "Color":np.array(['red' if 'EEB' in x else 'blue' for x in transect_ids], dtype='str'),
            'Longitude':ds.longitude.values,
            'Latitude':ds.latitude.values
        }
    ),
    geometry=gpd.points_from_xy(ds.longitude.values, ds.latitude.values),
    crs='4326'
)

  data.crs = crs
  level.crs = crs


In [25]:
geodatasets.data.flatten().keys()

dict_keys(['geoda.airbnb', 'geoda.atlanta', 'geoda.cars', 'geoda.charleston1', 'geoda.charleston2', 'geoda.chicago_health', 'geoda.chicago_commpop', 'geoda.chile_labor', 'geoda.cincinnati', 'geoda.cleveland', 'geoda.grid100', 'geoda.groceries', 'geoda.guerry', 'geoda.health', 'geoda.health_indicators', 'geoda.hickory1', 'geoda.hickory2', 'geoda.home_sales', 'geoda.houston', 'geoda.juvenile', 'geoda.lansing1', 'geoda.lansing2', 'geoda.lasrosas', 'geoda.liquor_stores', 'geoda.malaria', 'geoda.milwaukee1', 'geoda.milwaukee2', 'geoda.ncovr', 'geoda.natregimes', 'geoda.ndvi', 'geoda.nepal', 'geoda.nyc', 'geoda.nyc_earnings', 'geoda.nyc_education', 'geoda.nyc_neighborhoods', 'geoda.orlando1', 'geoda.orlando2', 'geoda.oz9799', 'geoda.phoenix_acs', 'geoda.police', 'geoda.sacramento1', 'geoda.sacramento2', 'geoda.savannah1', 'geoda.savannah2', 'geoda.seattle1', 'geoda.seattle2', 'geoda.sids', 'geoda.sids2', 'geoda.south', 'geoda.spirals', 'geoda.stlouis', 'geoda.tampa1', 'geoda.us_sdoh', 'ny.bb

In [75]:
# world = gpd.read_file(geodatasets.get_path("naturalearth.land"))

# fig, ax = plt.subplots(figsize=(15, 15))

# world.clip([-144, 70, -143, 73]).plot(ax=ax, color='white', edgecolor='black')
# world.clip([-150, 60, -140, 75]).plot(ax=ax, color='white', edgecolor='black')

masked_gdf = gdf.query("Longitude > -143.75").query("Longitude < -143.62")

points = masked_gdf.hvplot.points(geo=True,
            tiles="ESRI", color=masked_gdf.Color, tools=['tap'])

points

In [30]:
gdf.query("Longitude > -143.75").query("Longitude < -143.60")

Unnamed: 0,ID,Color,Longitude,Latitude,geometry
492,EEB_10_2417,red,-143.623734,70.157799,POINT (-143.62373 70.1578)
493,EEB_10_2425,red,-143.634323,70.156006,POINT (-143.63432 70.15601)
494,EEB_10_2433,red,-143.645035,70.155083,POINT (-143.64503 70.15508)
495,EEB_10_2441,red,-143.650314,70.154205,POINT (-143.65031 70.15421)
496,EEB_10_2449,red,-143.655716,70.15416,POINT (-143.65572 70.15416)
497,EEB_10_2457,red,-143.666412,70.153236,POINT (-143.66641 70.15324)
498,EEB_10_2465,red,-143.677094,70.152313,POINT (-143.67709 70.15231)
499,EEB_10_2473,red,-143.687607,70.150566,POINT (-143.68761 70.15057)
500,EEB_10_2481,red,-143.698273,70.149635,POINT (-143.69827 70.14964)
501,EEB_10_2489,red,-143.703506,70.148766,POINT (-143.70351 70.14877)


In [69]:
# print(ds.transect.values[:,0])
# print(ds.coords)
# print(ds.attrs)

Hs = ds.Hs.sel(time='2016-7-31T21:00:00').values

# print(ds.Hs.coords)

print(Hs)

[0.54 0.54 0.54 ... 0.43 0.43 0.43]


In [70]:
def string_to_numpy_bytes(string):
    # Convert each character in the string to a numpy.bytes_ object
    bytes_array = np.array([np.bytes_(char.encode('utf-8')) for char in string], dtype=np.bytes_)
    return bytes_array

str_key = 'EEB_10_2417'
byte_key = string_to_numpy_bytes(str_key)

print(byte_key)

ds.Hs.sel(transect=byte_key)

[b'E' b'E' b'B' b'_' b'1' b'0' b'_' b'2' b'4' b'1' b'7']


TypeError: invalid indexer array, does not have integer dtype: array([b'E', b'E', b'B', b'_', b'1', b'0', b'_', b'2', b'4', b'1', b'7'],
      dtype='|S1')

In [7]:
mask = (ds['longitude'].values > lon_min) * (ds['longitude'].values < lon_max) * (ds['latitude'].values > lat_min) * (ds['latitude'].values < lat_max)
# 
# print(mask)

ds['transect'].values[mask]

# ds_masked = ds.where(mask, drop=True)

ds.close()

IndexError: boolean index did not match indexed array along dimension 0; dimension is 11 but corresponding boolean dimension is 2381

In [6]:
# ds.transect.values