# Netcdf and xarray

In [1]:
import os
import urllib
import pandas as pd
import numpy as np
import xarray as xr # package to explore

In [2]:
# values of a single variable at each point of the coords 
temp_data = np.array([np.zeros((5,5)),np.ones((5,5)), np.ones((5,5))*2]).astype(int)

temp_data

array([[[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]],

       [[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]],

       [[2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2]]])

### Need to add dimensions and coordinates

In [3]:
# first array is temperature, second is lat, 3rd is longitude

#add dimensions as a tuple of strings and coordinates as a dictionary

# names of the dimensions in the required order

dims = ('time', 'lat', 'lon')

# create coordinates to use for indexing along each dimension 
coords = {'time': pd.date_range('2022-09-01', '2022-09-03'),
          'lat': np.arange(70, 20, -10),
          'lon': np.arange(60, 110, 10)}

In [4]:
# add attributes (metadata) as a dictionary

attrs = {'title' : 'temperature across weather stations',
         'standard_name' : 'air_temperature',
         'units' : 'degree_c'}


### Put everything into xarray

In [5]:
# initialize xarray.DataArray

temp = xr.DataArray(data = temp_data,
                    dims = dims,
                    coords = coords,
                    attrs = attrs)

temp

### Update variable's attributes

In [6]:
# update attributes
temp.attrs['description'] = 'simple example of an xarray.DataArray'

# add attributes to coordinates
temp.time.attrs = {'description': 'date of measurement'}

temp.lat.attrs = {'description': 'grid_latitude'}
temp.lat.attrs['units'] = 'degree_N'

temp.lon.attrs = {'description': 'grid_longitude'}
temp.lon.attrs['units'] = 'degree_E'

temp

### Play with Indexing

In [7]:
temp[0,1,2]

In [8]:
# better way to do this

temp.sel(time='2022-09-01', lat=40, lon=80)

### Play with Reduction

In [12]:
# xarray lets you carry over attributes when doing calculations 

avg_temp = temp.mean(dim='time')
# to keep attributes add keep_attrs = True

avg_temp.attrs = {'title': 'average temperature over three day period'}
avg_temp

### xarray.Dataset

In [13]:
# make an xarray.Dataset

data_vars = {'avg_temp' : avg_temp,
             'temp' : temp}

attrs = {'title' : 'temperature data at weather stations: daily and and average',
         'description' : 'simple example of an xarray.Dataset'}
# create xarray.Dataset
temp_dataset = xr.Dataset(data_vars = data_vars, attrs = attrs)

temp_dataset

### Save and Reopen

In [14]:
# specify file path: don't forget the .nc extension!
fp = os.path.join(os.getcwd(),'temp_dataset.nc') 
# save file
temp_dataset.to_netcdf(fp)

#open to check:
check = xr.open_dataset(fp)
check

### Practice Exercise

In [15]:
url = 'https://arcticdata.io/metacat/d1/mn/v2/object/urn%3Auuid%3A792bfc37-416e-409e-80b1-fdef8ab60033'

msg = urllib.request.urlretrieve(url, "FW_data_CESM_LW_2006_2100.nc")

In [16]:
fp = os.path.join(os.getcwd(),'FW_data_CESM_LW_2006_2100.nc')
fw_data = xr.open_dataset(fp)
fw_data

In [21]:
#select values for second ensemble member of netPrec_anual variable

member2 = fw_data.netPrec_annual.sel(member = 2)

member2

In [34]:
#Find maximum value of second member of netPrec_annual variable in time period of 2022 to 2100


member2.sel(time=np.arange(2022,2101)).argmax() #not inclusive of last value

#x_max = member2.loc[2022:2100].max()
#_max.item()