# Multi-dimensional Timeseries Datasets with XArray

## 1. Download temperature anomoly data and read it into an xarray dataset

In [2]:
import os
import matplotlib.pyplot as plt
import xarray as xr

import utils

filename = 'gistemp1200_GHCNv4_ERSSTv5.nc'
data_url = 'https://github.com/spatialthoughts/python-dataviz-web/raw/main/data/gistemp/'

utils.download(data_url + filename)

# Create the dataset object
file_path = os.path.join(utils.data_folder, filename)
ds = xr.open_dataset(file_path)

## 2. Selecting, masking, subsetting, and aggregating data

NetCDF Contents:
* Variables: This is similar to a band in a raster dataset. Each variable contains an array of values.
* Dimensions: This is similar to number of array axes.
* Coordinates: These are the labels for values in each dimension.
* Attributes: This is the metadata associated with the dataset.

In [3]:
# Array of start/end times for each averaging period
ds.time_bnds

# Temp grid anomaly table
da = ds.tempanomaly

# Temp anomaly values at specific timesteps
da.isel(time=-1)
da.sel(time='2021-12-15')
da.sel(lat=49, lon=-123, time='2021-06-15')
da.sel(lat=28.6, lon=77.2, time='2021-05-01', method='nearest') # Inexact lookup
da.sel(lat=28.6, lon=77.2, time='2021-05-01', method='nearest')
da.sel(time=slice('2021-01-01', '2021-12-31'))

# Choose and view above values
selected = da.sel(lat=28.6, lon=77.2, time='2021-05-01', method='nearest')
print(selected.values)

# Mask then subset with where()
selected = da.sel(lat=28.6, lon=77.2, method='nearest')
positive = selected.where(selected > 0, drop=True)

# Average temp anomoly for 2021
subset2021 = da.sel(time=slice('2021-01-01', '2021-12-31'))
subset2021.mean(dim='time')
# Aggregate to yearly
yearly = da.groupby('time.year').mean(dim='time')

-0.76
