# Xarray and NetCDF

In [1]:
import os
import urllib
import pandas as pd
import numpy as np
import xarray as xr


In [4]:
temp_data = np.array([np.zeros((5,5)),
                      np.ones((5,5)),
                      np.ones((5,5))*2]).astype(int)

temp_data

array([[[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]],

       [[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]],

       [[2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2]]])

In [7]:
dims = ('time', 'lat', 'lon')
coords = {'time': pd.date_range('2022-09-01', '2022-09-03'), 
          'lat' : np.arange(70, 20, -10), # Starting with upper lefthand corner of array and going down by row
          'lon': np.arange(60, 110, 10)}

attrs = {'title': 'temperature across data array'}

In [9]:
temp = xr.DataArray(data = temp_data, 
                    dims = dims, 
                    coords = coords, 
                    attrs = attrs)


temp

In [10]:
temp.attrs['description'] = 'simple example of xarray'
temp.lat.attrs['units'] = 'degree_N'

temp

In [13]:
temp[1,2,2] # Indexing = time, lat, lon 

In [15]:
temp.sel(time='2022-09-01', lat=40, lon=80)

In [22]:
temp.sel(time='2022-09-01', lon=slice(40,80,2))

In [18]:
avg_temp = temp.mean(dim = 'time', keep_attrs = True)

avg_temp

In [24]:
data_var = {'avg_temp': avg_temp, 
            'temp' : temp}

attrs = {'title' : 'temperature data at weather stations with daily and average temps'}

temp_dataset = xr.Dataset(data_vars = data_var, 
                          attrs = attrs)

temp_dataset

In [25]:
temp_dataset.to_netcdf("temp_example.nc")

In [27]:
check = xr.open_dataset("temp_example.nc")
check

In [28]:
url = 'https://arcticdata.io/metacat/d1/mn/v2/object/urn%3Auuid%3A792bfc37-416e-409e-80b1-fdef8ab60033'

msg = urllib.request.urlretrieve(url, "FW_data_CESM_LW_2006_2100.nc")

In [29]:
fp = os.path.join(os.getcwd(),'FW_data_CESM_LW_2006_2100.nc')
fw_data = xr.open_dataset(fp)
fw_data

Select values for the second member of the `netPrec_annual` variable

In [35]:
fw_data.netPrec_annual[:,1] # Requires you to remember where your individual member starts 

fw_data.sel(member=2)['netPrec_annual']

fw_data.netPrec_annual.sel(member = 2)

What is the max value of the second member of the `netPrec_annual` variable between 2022 and 2100?

In [46]:
fw_precip2 = fw_data.sel(member=2)['netPrec_annual'] # my hacky way 
fw_precip2.sel(time = slice(2022, 2100)).max() # my hacky way pt. 2

x_max = fw_precip2.loc[2022:2100].max().item() # .item() gives an integer and not an array
x_max

fw_data.sel(member=2, time=slice(2022, 2100))['netPrec_annual'].max().item() # One-line method 


2431.100323507693