In [50]:
import pandas as pd
import numpy as np
import xarray as xr 
import rioxarray as rio 


### Xarray

from https://stackoverflow.com/questions/66169106/transform-part-of-a-netcdf-file-into-a-dataframe-with-xarray:

In [9]:
dp = xr.open_dataset('./data/ml_hfi_v1_2000.nc')

ml = dp.to_dataframe()
ml = ml.dropna().reset_index()
print(ml.head())

          lat        lon  __xarray_dataarray_variable__
0  -55.609663 -68.108226                   3.328762e-06
1  -55.609663 -68.098333                   1.328018e-08
2  -55.609663 -68.088440                   1.276196e-13
3  -55.609663 -68.078547                   7.129802e-14
4  -55.609663 -68.068655                   6.915253e-18
5  -55.609663 -68.058762                   8.362481e-10
6  -55.609663 -68.048869                   4.444112e-13
7  -55.609663 -68.038976                   6.182441e-06
8  -55.609663 -68.029084                   7.117076e-07
9  -55.599770 -68.226938                   3.421031e-08
10 -55.599770 -68.217046                   4.486133e-09
11 -55.599770 -68.207153                   6.264514e-09
12 -55.599770 -68.197260                   5.907112e-08
13 -55.599770 -68.187367                   1.511353e-05
14 -55.599770 -68.118118                   5.960242e-10


In [10]:
ml.tail()

Unnamed: 0,lat,lon,__xarray_dataarray_variable__
140472840,69.988495,171.830159,1.399456e-07
140472841,69.988495,171.840051,0.000279031
140472842,69.988495,171.849944,3.740434e-05
140472843,69.988495,171.859837,0.0412801
140472844,69.988495,171.86973,0.0008492768


In [11]:
# Trimming data for just CO latitudes
co_coords = ml[(ml.lat <= 41) & (ml.lat >= 37)].copy()

In [12]:
co_coords.head()

Unnamed: 0,lat,lon,__xarray_dataarray_variable__
73052098,37.006118,-122.17202,0.527928
73052099,37.006118,-122.162127,0.323778
73052100,37.006118,-122.152235,0.352339
73052101,37.006118,-122.142342,0.182623
73052102,37.006118,-122.132449,0.152508


In [13]:
# And further subsetting for just CO longitudes:
co_coords = co_coords[(co_coords.lon <= 109) & (co_coords.lon >= 102)]

In [14]:
# Confirming we have only the range for CO:
co_coords.describe()

Unnamed: 0,lat,lon,__xarray_dataarray_variable__
count,285623.0,285623.0,285623.0
mean,38.999493,105.499354,0.1522127
std,1.153732,2.019027,0.141121
min,37.006118,102.007238,1.3239550000000001e-17
25%,38.000338,103.748359,0.01960994
50%,38.994557,105.499373,0.133714
75%,39.993724,107.250387,0.2272051
max,40.99289,108.991509,0.9081439


In [15]:
co_coords.shape

(285623, 3)

### Created desired subset for AOI:
40 - 30N and -110 - -100W

In [51]:
dp = xr.open_dataset('./data/ml_hfi_v1_2019.nc')

In [52]:
dp

In [53]:
dp['__xarray_dataarray_variable__']

In [54]:
# From https://stackoverflow.com/questions/29135885/netcdf4-extract-for-subset-of-lat-lon

ds = xr.open_dataset('./data/ml_hfi_v1_2019.nc')
lat_bnds, lon_bnds = [30, 40], [-110, -100]
subset = ds.sel(lat=slice(*lat_bnds), lon=slice(*lon_bnds))
subset

In [55]:
type(subset)

xarray.core.dataset.Dataset

In [56]:
# Saving dataarray to netCDF
subset.to_netcdf('./data/ml_hfi_subset.nc')

### Convert .nc to geoTIFF
from: https://help.marine.copernicus.eu/en/articles/5029956-how-to-convert-netcdf-to-geotiff

In [57]:
nc_file = xr.open_dataset('./data/ml_hfi_subset.nc')
nc_file

In [47]:
ml_hfi = nc_file['__xarray_dataarray_variable__']

In [48]:
ml_hfi = ml_hfi.rio.set_spatial_dims(x_dim='lon', y_dim='lat')
ml_hfi.rio.crs

MissingSpatialDimensionError: x dimension (lon) not found. Data variable: __xarray_dataarray_variable__

In [49]:
# Define the CRS projection
ml_hfi.rio.write_crs("epsg:4326", inplace=True)

In [7]:
ml_hfi.rio.to_raster(r"ml_hfi2019.tiff")

In [2]:
import rasterio
image_file = "./data/ml_hfi2019.tiff"

mlhfi_image = rasterio.open(image_file)

In [3]:
mlhfi_image

<open DatasetReader name='./data/ml_hfi2019.tiff' mode='r'>