# Xarray and NetCDF
import libraries

In [1]:
import os
import urllib
import pandas as pd
import numpy  as np
import xarray as xr

## Data

In [19]:
temp_data = np.array([np.zeros((5,5)),
                      np.ones((5,5)),
                      np.ones((5,5))*2
                      ]).astype(int)
temp_data

array([[[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]],

       [[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]],

       [[2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2]]])

## Dimensions

In [23]:
dims   = ('time', 'lat', 'lon')
coords = {'time': pd.date_range('2022-09-01', '2022-09-03'),
          'lat' : np.arange(70, 20, -10),
          'lon': np.arange(60, 110, 10)
          }

## Attributes

In [24]:
attrs = {'title': 'temperature across data array'}

## Generate Xarray

In [25]:
temp = xr.DataArray(data = temp_data,
                    dims = dims, 
                    coords = coords, 
                    attrs = attrs)
temp

## Add attributes to existing Xarray Data

In [26]:
temp.attrs['Description'] = 'simple example of xarray'
temp.lat.attrs['units']   = 'degree_n'
temp

## Working with data
  - Indexing
  - Aggregating

In [27]:
temp[1, 1, 2]

In [28]:
#temp.sel(time = '2022-09-01', lat = 40, lon = 80)
temp.sel(time='2022-09-01', lat=40, lon=80)

In [29]:
avg_temp = temp.mean(dim = 'time', keep_attrs = True)
avg_temp

In [31]:
temp.sel(time = '2022-09-01', lon = slice(40, 80, 2))

In [33]:
data_var = {'avg_temp': avg_temp, 
            'temp':    temp}
attrs = {'title' : 'temperature data at weather stations with daily and average temps'}

temp_dataset = xr.Dataset(data_vars = data_var,
                          attrs = attrs)

In [35]:
temp_dataset.to_netcdf("temp_example.nc")

In [36]:
check = xr.open_dataset('temp_example.nc')
check

In [37]:
url = 'https://arcticdata.io/metacat/d1/mn/v2/object/urn%3Auuid%3A792bfc37-416e-409e-80b1-fdef8ab60033'

msg = urllib.request.urlretrieve(url, "FW_data_CESM_LW_2006_2100.nc")

In [38]:
fp = os.path.join(os.getcwd(),'FW_data_CESM_LW_2006_2100.nc')
fw_data = xr.open_dataset(fp)
fw_data

## Select values for the second ensemble member of the netPrec_annual variable
Note that we call out the variable in fw_data using the dot syntax (dataset.variable)

In [41]:
member2 = fw_data.netPrec_annual.sel(member = 2 )
member2

What is the max value of the second member or the'netPrec_annual' variable between 2022 and 2100?

In [53]:

sub_fw = fw_data.netPrec_annual.sel(member = 2, time = slice(2022,2100,1) ).max().item()
sub_fw2 = max(fw_data.netPrec_annual.sel(member = 2, time = slice(2022,2100,1))).item()
#sub_fw
sub_fw2

2431.100323507693