In [None]:
import os
import numpy as np
import pandas as pd
import cdsapi
import netCDF4
from netCDF4 import num2date
from scipy.io import netcdf

In [None]:
file = netCDF4.Dataset('insert_nc_file_here', 'r')

# Unpacking NetCDF files

In [None]:
#view nested file structure and sample metadata
file.variables

In [None]:
#display initial unprocessed data 
dhw = file.variables['degree_heating_week']  
data = dhw[:]
data

In [None]:
#sample observations
data[0][0][1]

In [None]:
#check dimensions
for dname, d in file.dimensions.items():
    print(d)

# Sample observations using geographic restrictions

In [None]:
lat = file.variables['lat'][:]
lon = file.variables['lon'][:] 

# isolated data regarding the Palmyra Islands using a coordinate based bounding box
lat_min = 5.5
lat_max = 7.0
lon_min = -163.0
lon_max = -161.5

lat_inds = np.where((lat >= lat_min) & (lat <= lat_max))[0]
lon_inds = np.where((lon >= lon_min) & (lon <= lon_max))[0]

degree_heating_week = file.variables['degree_heating_week'][:, lat_inds, lon_inds]

print(degree_heating_week)

# Converting data to tabular format

In [None]:
#retrieving dhw, time, longitude, and latitude data 
dhw = file.variables['degree_heating_week']
time_dim, lat_dim, lon_dim = dhw.get_dims()
time_var = file.variables[time_dim.name]
times = num2date(time_var[:], time_var.units)
latitudes = file.variables[lat_dim.name][:]
longitudes = file.variables[lon_dim.name][:]

#storing output file
output_dir = './'
filename = os.path.join(output_dir, 'output_table.csv')

#creating empty datagrids to store  data 
times_grid, latitudes_grid, longitudes_grid = [
    x.flatten() for x in np.meshgrid(
    times, latitudes, longitudes, indexing='ij')]

#unwrapping parsed data and writing into tabular format
df = pd.DataFrame({
    'time': [t.isoformat() for t in times_grid],
    'latitude': latitudes_grid,
    'longitude': longitudes_grid,
    'dhw': dhw[:].flatten()})

#storing in output file and viewing example output 
df.to_csv(filename, index=False)
example_output = df[df["dhw"]>0]
example_output = example_output.reset_index().drop(columns = ["index"])
example_output

In [None]:
#checking final dimensions 
df.shape[0] * df.shape[1] * 2