In [14]:
# conda install netCDF4

In [25]:
# conda install -c conda-forge xarray dask bottleneck

In [26]:
# conda update -n base -c conda-forge conda

In [28]:
import pandas as pd
import numpy as np

import netCDF4 as nc
import xarray as xr

### netCDF4

Following is inspired by https://towardsdatascience.com/read-netcdf-data-with-python-901f7ff61648

In [30]:
# Reading in with netCDF4:
fn = './data/ml_hfi_v1_2000.nc'
ds = nc.Dataset(fn)

In [31]:
print(ds)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): lat(13141), lon(36390)
    variables(dimensions): float64 lat(lat), float64 lon(lon), float64 __xarray_dataarray_variable__(lat, lon)
    groups: 


In [32]:
print(ds.data_model)

NETCDF4


In [33]:
print(ds.__dict__)

{}


In [34]:
for dim in ds.dimensions.values():
    print(dim)

<class 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 13141
<class 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 36390


In [35]:
for var in ds.variables.values():
    print(var)

<class 'netCDF4._netCDF4.Variable'>
float64 lat(lat)
    _FillValue: nan
unlimited dimensions: 
current shape = (13141,)
filling on
<class 'netCDF4._netCDF4.Variable'>
float64 lon(lon)
    _FillValue: nan
unlimited dimensions: 
current shape = (36390,)
filling on
<class 'netCDF4._netCDF4.Variable'>
float64 __xarray_dataarray_variable__(lat, lon)
    _FillValue: nan
unlimited dimensions: 
current shape = (13141, 36390)
filling on


In [36]:
# additional netCDF4 info at http://schubert.atmos.colostate.edu/~cslocum/netcdf_example.html

### Xarray

from https://stackoverflow.com/questions/66169106/transform-part-of-a-netcdf-file-into-a-dataframe-with-xarray:

In [38]:
dp = xr.open_dataset('./data/ml_hfi_v1_2000.nc')

m2 = dp.to_dataframe()
m2 = m2.dropna().reset_index()
print(m2.head(15))

          lat        lon  __xarray_dataarray_variable__
0  -55.609663 -68.108226                   3.328762e-06
1  -55.609663 -68.098333                   1.328018e-08
2  -55.609663 -68.088440                   1.276196e-13
3  -55.609663 -68.078547                   7.129802e-14
4  -55.609663 -68.068655                   6.915253e-18
5  -55.609663 -68.058762                   8.362481e-10
6  -55.609663 -68.048869                   4.444112e-13
7  -55.609663 -68.038976                   6.182441e-06
8  -55.609663 -68.029084                   7.117076e-07
9  -55.599770 -68.226938                   3.421031e-08
10 -55.599770 -68.217046                   4.486133e-09
11 -55.599770 -68.207153                   6.264514e-09
12 -55.599770 -68.197260                   5.907112e-08
13 -55.599770 -68.187367                   1.511353e-05
14 -55.599770 -68.118118                   5.960242e-10


In [39]:
m2.tail()

Unnamed: 0,lat,lon,__xarray_dataarray_variable__
140472840,69.988495,171.830159,1.399456e-07
140472841,69.988495,171.840051,0.000279031
140472842,69.988495,171.849944,3.740434e-05
140472843,69.988495,171.859837,0.0412801
140472844,69.988495,171.86973,0.0008492768


In [40]:
# Trimming data for just CO latitudes
co_coords = m2[(m2.lat <= 41) & (m2.lat >= 37)].copy()

In [41]:
co_coords.head()

Unnamed: 0,lat,lon,__xarray_dataarray_variable__
73052098,37.006118,-122.17202,0.527928
73052099,37.006118,-122.162127,0.323778
73052100,37.006118,-122.152235,0.352339
73052101,37.006118,-122.142342,0.182623
73052102,37.006118,-122.132449,0.152508


In [42]:
# And further subsetting for just CO longitudes:
co_coords = co_coords[(co_coords.lon <= 109) & (co_coords.lon >= 102)]

In [43]:
# Confirming we have only the range for CO:
co_coords.describe()

Unnamed: 0,lat,lon,__xarray_dataarray_variable__
count,285623.0,285623.0,285623.0
mean,38.999493,105.499354,0.1522127
std,1.153732,2.019027,0.141121
min,37.006118,102.007238,1.3239550000000001e-17
25%,38.000338,103.748359,0.01960994
50%,38.994557,105.499373,0.133714
75%,39.993724,107.250387,0.2272051
max,40.99289,108.991509,0.9081439


In [44]:
co_coords.shape

(285623, 3)

## Modeling

In [45]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import utils
from tensorflow.keras.datasets import mnist

ModuleNotFoundError: No module named 'tensorflow'