### Analyzing arrays

In [1]:
import os
import urllib
import pandas as pd
import numpy as np

import xarray as xr   # This is the package we'll explore

### Creating an xarray DataArray (single variable)

In [10]:

# create the data
temp_data = np.array([np.zeros((5,5)),
                      np.ones((5,5)),
                      np.ones((5,5))*2]).astype(int)

# create the dimensions and coords
dims = ('time', 'lat', 'lon')
coords = {'time': pd.date_range("2022-09-01", "2022-09-03"),
          'lat': np.arange(70,20,-10),
          'lon': np.arange(60,110,10)
          }

# attributes (metadata) of the data array
attrs = {'title': 'temperature across weather stations',
         'standard_name': 'air_temperature',
         'units': 'degree_c'}

temp = xr.DataArray(data = temp_data,
                    dims = dims,
                    coords = coords,
                    attrs = attrs)

In [11]:
temp

### Modifying, adding to the dataset

In [22]:
temp.attrs['description'] =  'simple example of xarray'

temp.lat.attrs = {'description':'latitude of measurement',
                   'standard_name': 'grid_latitude',
                   'units': 'degrees_N'}



### Grabbing data from the file

In [26]:
# method 1: as an array
temp[0,1,2]

# method 2: specifying values along dimensions
temp.loc["2022-09-01",70,:]

# method 3: using sel, which even allows grabbing nearest
temp.sel(lon=70, time = "2022-09-05", method = 'nearest')

### Creating an xarray DataSet (multiple variables)

In [33]:
avg_temp = temp.mean(dim = ('time'))

In [34]:
data_vars = {
    'temp': temp,
    'avg_temp': avg_temp
}

attrs = {
    'title': 'temperature data at weather station'
}

temp_dataset = xr.Dataset(data_vars = data_vars, attrs = attrs)

In [36]:
# specify file path: don't forget the .nc extension!
fp = os.path.join(os.getcwd(),'temp_dataset.nc')
# save file
temp_dataset.to_netcdf(fp)

# open to check:
check = xr.open_dataset(fp)
check

### Downloading a netcdf file with urllib

In [37]:
url = 'https://arcticdata.io/metacat/d1/mn/v2/object/urn%3Auuid%3A792bfc37-416e-409e-80b1-fdef8ab60033'

msg = urllib.request.urlretrieve(url, "FW_data_CESM_LW_2006_2100.nc")

In [38]:
fp = os.path.join(os.getcwd(),'FW_data_CESM_LW_2006_2100.nc')
fw_data = xr.open_dataset(fp)
fw_data

In [1]:
fw_data.runoff_annual.sel(member=1)

NameError: name 'fw_data' is not defined

### Pandas to csv

In [10]:
# fp = os.path.join('data/netcdf_temp_data.csv')

# specify columns representing dimensions
dimension_columns = [0,1,2]

# read file
temp = pd.read_csv('data/netcdf_temp_data.csv', index_col=dimension_columns)
temp

FileNotFoundError: [Errno 2] No such file or directory: 'data/netcdf_temp_data.csv'

FileNotFoundError: [Errno 2] No such file or directory: '/home/mjewell/scalable-computing-examples/04-pleasing/netcdf_temp_data.csv'