# Using Xarray Package to Learn NetCDF Format

In [16]:
import os
import urllib
import pandas as pd
import numpy as np
import xarray as xr


In [17]:
#Making a 3 dimensional 5x5x3 array
temp_data = np.array([np.zeros((5,5)),
                      np.ones((5,5)),
                      np.ones((5,5))*2]).astype(int)

temp_data

array([[[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]],

       [[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]],

       [[2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2]]])

In [18]:
#setting up supporting info on dimensions
dims = ('time','lat','lon')
coords ={'time': pd.date_range('2022-09-01','2022-09-03'),
         'lat' : np.arange(70,20,-10),
         'lon' : np.arange(60,110,10)}


In [19]:
#setting up additional atrributes
attrs = {'title':'temperature across data array'}

In [20]:
#using our data structures we've created to make an xarray
temp = xr.DataArray(data=temp_data,
                    dims = dims, #dimensions are the unchanging variables (lat, long, time)
                    coords = coords, #coords are the numbers along the dimensions
                    attrs = attrs) 


In [21]:
#adding an attribute to the whole data structure
temp.attrs['description'] = 'temperature data as an example of xarray'
#adding an attribute to just one dimension
temp.lat.attrs['units'] = 'degrees north'
temp

In [22]:
#getting one data point by index, not super helpful
temp[0,1,2]

In [24]:
#selecting by coords, more helpful
temp.sel(time='2022-09-01', lat = 40, lon = 80)

In [26]:
#creating a new xarray by taking the mean across time, keeps dimensions and attributes
avg_temp = temp.mean(dim = 'time', keep_attrs= True)
avg_temp

# xarray Dataset


In [30]:
#creating a dataset 
data_vars = {'avg_temp':avg_temp,
            'temp' : temp}

attrs = {'title' : 'Temperature data with average and daily values by weather station'}

temp_dataset = xr.Dataset(data_vars = data_vars,
                          attrs = attrs)

temp_dataset

In [31]:
#exporting to netcdf file
temp_dataset.to_netcdf("temp_example.nc")

In [32]:
#opening a netcdf file
check = xr.open_dataset('temp_example.nc')
check

# Real World Example

In [33]:
#using url to get a real world netcdf dataset
url = 'https://arcticdata.io/metacat/d1/mn/v2/object/urn%3Auuid%3A792bfc37-416e-409e-80b1-fdef8ab60033'

msg = urllib.request.urlretrieve(url, "FW_data_CESM_LW_2006_2100.nc")

In [34]:
fp = os.path.join(os.getcwd(),'FW_data_CESM_LW_2006_2100.nc')
fw_data = xr.open_dataset(fp)
fw_data

In [48]:
#getting the data for the second member (member is a dimension) the netPrec_annual (variable)
fw_data.netPrec_annual.sel(member=2)

In [46]:
#max value of the second member (member is a dimension) of netPrec_annual (variable) between 2022 and 2100 (time is a dimension)
fw_data.netPrec_annual.sel(member=2,time = slice(2022,2100)).max().item()

2431.100323507693