In [2]:
import os
import urllib
import pandas as pd
import xarray as xr
import numpy as np

## Create an xarray .DataArray

In [5]:
temp_data = np.array([np.zeros((5,5)), np.ones((5,5)), 2*np.ones((5,5))]).astype(int)
temp_data

array([[[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]],

       [[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]],

       [[2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2]]])

The above data is incomplete because it does not have the dimension labels associated with each label (like which dimension is lat, lon, & time), and the units of the data). We want this metadata to be self contained.

In [7]:
# names of the dimensions
dims = ('time', 'lat', 'lon')

# coordinates (tick labels) to use for indexing along each dimension
coords = {
    'time':pd.date_range('2022-09-01', "2022-09-03"),
    'lat':np.arange(30, 80, 10), # start at 30, end at 80, and the steps are 10 in between
    'lon':np.arange(60, 110, 10)
    }

# attributes (metadata) of the array
attrs = {
    'title': 'temperature across weather stations',
    'standard_name':'air temperature',
    'units':'degree_c'
}


In [9]:
temp = xr.DataArray(
    data = temp_data,
    dims = dims,
    coords = coords,
    attrs = attrs
)

temp

In [None]:
# add attributes to coordinates
# create new key within the attribute
temp.lat.attrs['standard_name'] = 'grid_latitude'
temp.lat.attrs['units'] = 'degree_N'

temp.lon.attrs['standard_name'] = 'grid_longitude'
temp.lon.attrs['units'] = 'degree_E'

#temp.time.attrs['']

In [10]:
# indexing
temp[0, 1, 2] # referencing the first day and 40 and 80 lat / lon but this is difficlut to referenc this way, so use the select method instead in the next chunk

In [12]:
temp.sel(time = '2022-09-01',
        lat = 40,
        lon = 80)
# this way was much easier, it is called label based indexing 

In [13]:
# calculate the mean across the time dimension
avg_temp = temp.mean(dim = 'time')
avg_temp # notice how the dimensions displayed in output are just lat and lon since we reduced the time dimension

In [14]:
avg_temp.attrs = {
    'title':'avergae temperature across 3 days'
}

avg_temp

In [17]:
# combine the temperature and avg temperature into the same dataset
# first make a dictionary with the variables for the new avg variable and the overall dataset
data_vars = {
    'avg_temp':avg_temp,
    'temp':temp,
}

attrs = {
    'title':'temperature data at weather stations: daily and average',
    'description':'simple xarray.Dataset'
}

temp_dataset = xr.Dataset(data_vars = data_vars, attrs = attrs)
# a data array is a single variable, and a dataset can hold multiple variables
temp_dataset
# the coordinates were paired automatically by xarray

In [19]:
fp = os.path.join(os.getcwd(), 'temp_dataset.nc')

temp_dataset.to_netcdf(fp)

In [21]:
# open to check 
check = xr.open_dataset(fp)
check