# IRF - Uppsala Python Workshop: Snakes in Space 🐍
author: Louis Richard\
e-mail: louisr@irfu.se\
date: 29/02/2024

## Time series, spectrogram, VDFs, & co.
Introduction to data structures for space plasma analysis:
    - The xarray package\
    - DataArrays for time series and spectrograms.\
    - Datasets for velocity distribution functions.

In [1]:
pip install xarray

Note: you may need to restart the kernel to use updated packages.


## Time series

### import packages

In [2]:
import numpy as np
import pycdfpp
import xarray as xr

### Read a CDF file (here MMS's FGM)

In [3]:
cdf = pycdfpp.load(
    "../../data/mms1_fgm_brst_l2_20201119233633_v5.270.0.cdf"
)


### Get variable

In [4]:
def _get_variable(cdf, key):
    r"""Read `key` CDF zVariable data and attributes in `cdf` file.

    Parameters
    ----------
    cdf : pycdfpp._pycdfpp.CDF
        CDF file object.
    key : str
        Name of the zVariable to get.

    Returns
    -------
    zvar_dict : dict
        Hashtable with zVariable data and metadata.

    Raises
    ------
    KeyError: if key is not in cdf zVariables.

    """

    try:
        zvar_data = np.squeeze(cdf[key].values)
        zvar_attrs = {k: v[0] for k, v in cdf[key].attributes.items()}
        zvar_dict = {"data": zvar_data, "attrs": zvar_attrs}
    except IndexError:
        raise KeyError("zVariable not found in CDF file!!")

    return zvar_dict

In [5]:
zvar = _get_variable(cdf, "mms1_fgm_b_gse_brst_l2")
zvar

{'data': array([[  7.4057703,  -7.5381603, -17.104761 ,  20.105772 ],
        [  7.436583 ,  -7.55284  , -17.238848 ,  20.236748 ],
        [  7.460041 ,  -7.537707 , -17.317068 ,  20.306404 ],
        ...,
        [ 12.072721 ,  -9.327868 , -17.168833 ,  22.967989 ],
        [ 12.040135 ,  -9.351912 , -17.224491 ,  23.002308 ],
        [ 12.019228 ,  -9.317156 , -17.268042 ,  23.009922 ]],
       dtype=float32),
 'attrs': {'CATDESC': 'Magnetic field vector in Geocentric Solar Ecliptic (GSE) cartesian coordinates plus Btotal (128 S/s)',
  'DEPEND_0': 'Epoch',
  'DISPLAY_TYPE': 'time_series',
  'FIELDNAM': 'Magnetic field vector in GSE plus Btotal (128 S/s)',
  'FILLVAL': [-9.999999848243207e+30],
  'FORMAT': 'E13.5',
  'LABL_PTR_1': 'label_b_gse',
  'UNITS': 'nT',
  'VALIDMIN': [-20000.0, -20000.0, -20000.0, 0.0],
  'VALIDMAX': [20000.0, 20000.0, 20000.0, 20000.0],
  'VAR_TYPE': 'data',
  'SCALETYP': 'linear',
  'COORDINATE_SYSTEM': 'GSE',
  'SI_CONVERSION': '1.0e-9>T',
  'TENSOR_ORDER

In [6]:
depend_0_key = zvar["attrs"]["DEPEND_0"]
depend_1_key = zvar["attrs"]["REPRESENTATION_1"]
print(depend_0_key, depend_1_key)

Epoch represent_vec_tot


In [7]:
depend_0 = _get_variable(cdf, depend_0_key)

# Since DEPEND_0 is time we convert to datetime64
depend_0["data"] = pycdfpp.to_datetime64(depend_0["data"])


depend_1 = _get_variable(cdf, depend_1_key)

In [8]:
depend_0

{'data': array(['2020-11-19T23:36:33.022456034', '2020-11-19T23:36:33.030268642',
        '2020-11-19T23:36:33.038081250', ...,
        '2020-11-19T23:38:42.000807101', '2020-11-19T23:38:42.008619709',
        '2020-11-19T23:38:42.016432318'], dtype='datetime64[ns]'),
 'attrs': {'CATDESC': 'Interval centered time tag (TBC)',
  'FIELDNAM': 'Time since Jan 1, 1958',
  'FILLVAL': [9999-12-31T23:59:59.999999999],
  'LABLAXIS': 'mms1_fgm_brst_Epoch',
  'UNITS': 'ns',
  'VALIDMIN': [2010-01-01T00:00:00.000000000],
  'VALIDMAX': [2029-12-31T23:59:59.999000000],
  'VAR_TYPE': 'support_data',
  'SCALETYP': 'linear',
  'MONOTON': 'INCREASE',
  'TIME_BASE': 'J2000',
  'TIME_SCALE': 'Terrestrial Time',
  'REFERENCE_POSITION': 'Rotating Earth Geoid',
  'SI_CONVERSION': '1.0e-9>s',
  'DELTA_PLUS_VAR': 'mms1_fgm_bdeltahalf_brst_l2',
  'DELTA_MINUS_VAR': 'mms1_fgm_bdeltahalf_brst_l2'}}

In [9]:
depend_1

{'data': array([b'x', b'y', b'z', b'r'], dtype='|S1'),
 'attrs': {'CATDESC': 'representation for vector plus total',
  'FIELDNAM': 'representation for vector plus total',
  'FORMAT': 'A2',
  'LABLAXIS': 'represent_vec_tot',
  'VAR_TYPE': 'metadata'}}

### Construct time series as xarray.DataArray

In [10]:
zvar_ts = xr.DataArray(
    zvar["data"],
    coords=[depend_0["data"], depend_1["data"]],
    dims=[depend_0_key, depend_1_key],
    attrs=zvar["attrs"],
)

zvar_ts

In [11]:
zvar_ts[depend_0_key].attrs = depend_0["attrs"]
zvar_ts[depend_1_key].attrs = depend_1["attrs"]

In [12]:
zvar_ts

In [13]:
import matplotlib.pyplot as plt

In [14]:
np.linalg.norm(zvar["data"], axis=1)

array([28.433857, 28.619085, 28.717592, ..., 32.481644, 32.530178,
       32.540943], dtype=float32)

In [15]:
np.linalg.norm(zvar_ts, axis=1)

array([28.433857, 28.619085, 28.717592, ..., 32.481644, 32.530178,
       32.540943], dtype=float32)

In [16]:
zvar_ts.Epoch.data > np.datetime64("2020-11-19T23:36:33.038081250")

array([False, False, False, ...,  True,  True,  True])

In [17]:
cond = np.logical_and(zvar_ts.Epoch.data > np.datetime64("2020-11-19T23:36:33.038081250"), 
               zvar_ts.Epoch.data < np.datetime64("2020-11-19T23:36:33.048081250"))

In [18]:
zvar_ts[cond, :]

## Energy spectra and VDFs (or anything with time varying coordinates)

### Energy spectra: time x energy(time)

In [19]:
cdf = pycdfpp.load(
    "../../data/mms1_fpi_brst_l2_dis-moms_20201119233633_v3.3.0.cdf"
)
cdf

CDF:
  version: 3.7.1
  majority: Adaptative column
  compression: None

Attributes:
  Project: "STP>Solar-Terrestrial Physics"
  Source_name: "MMS1>MMS Satellite Number 1"
  Discipline: "Space Physics>Magnetospheric Science"
  Data_type: "brst_l2_dis-moms"
  Descriptor: "DIS>Dual Ion Spectrometers"
  File_naming_convention: "source_descriptor_datatype_yyyyMMddHHmmss"
  Data_version: "3.3.0"
  PI_name: "J. Burch, B. Giles"
  PI_affiliation: "SwRI, GSFC"
  TEXT: "FPI usually operates in Fast Survey (FS) Mode in the MMS Region Of Interest (ROI) for the current Mission Phase.  Data are taken at burst (30/150 ms for DES/DIS) resolution in this mode.  Data are also made available at survey (4.5 s, etc) resolution.  Per mission design, not all burst-resolution data are downlinked, but all survey data are downlinked.  Planning around calibration activities, avoidance of Earth radiation belts, etc, when possible, FPI usually operates in Slow Survey (SS) Mode outside of ROI, and then only the 6

### Get differential energy flux energy spectrogram

In [20]:
zvar = _get_variable(cdf, "mms1_dis_energyspectr_omni_brst")
depend_keys = [zvar["attrs"][f"DEPEND_{i:d}"] for i in range(2)]
depends = [_get_variable(cdf, depend_key) for depend_key in depend_keys]
depends[0]["data"] = pycdfpp.to_datetime64(depends[0]["data"])

In [21]:
depend_keys

['Epoch', 'mms1_dis_energy_brst']

In [22]:
depends

[{'data': array(['2020-11-19T23:36:33.101532000', '2020-11-19T23:36:33.251532000',
         '2020-11-19T23:36:33.401532000', '2020-11-19T23:36:33.551532000',
         '2020-11-19T23:36:33.701532000', '2020-11-19T23:36:33.851532000',
         '2020-11-19T23:36:34.001541000', '2020-11-19T23:36:34.151541000',
         '2020-11-19T23:36:34.301541000', '2020-11-19T23:36:34.451541000',
         '2020-11-19T23:36:34.601541000', '2020-11-19T23:36:34.751541000',
         '2020-11-19T23:36:34.901541000', '2020-11-19T23:36:35.051549000',
         '2020-11-19T23:36:35.201549000', '2020-11-19T23:36:35.351549000',
         '2020-11-19T23:36:35.501549000', '2020-11-19T23:36:35.651549000',
         '2020-11-19T23:36:35.801549000', '2020-11-19T23:36:35.951549000',
         '2020-11-19T23:36:36.101558000', '2020-11-19T23:36:36.251558000',
         '2020-11-19T23:36:36.401558000', '2020-11-19T23:36:36.551558000',
         '2020-11-19T23:36:36.701558000', '2020-11-19T23:36:36.851558000',
         '2020-11

### Create energy spectrogram as xarray.Datasets

In [23]:
# Get dimension names
dimensions = [depend["attrs"]["LABLAXIS"] for depend in depends]

# Construct VDFs as xarray.Dataset
def_i_omni = xr.Dataset(
    {
        "data": ([dimensions[0], "idx1"], zvar["data"]),
        dimensions[0]: depends[0]["data"],
        dimensions[1]: (
            [dimensions[0], "idx1"],
            depends[1]["data"],
        ),  #  dimension 1 (azimuth) is time dependent, therefore we use indices
        "idx1": np.arange(depends[1]["data"].shape[1]),
    }
)

In [24]:
def_i_omni

### Velocity Distributions (VDFs): time x azimuth(time) x elevation x energy(time)

In [25]:
cdf = pycdfpp.load(
    "/Users/louisr/Dropbox/Documents/python-workshop/codes/data/mms1_fpi_brst_l2_dis-dist_20201119233433_v3.3.0.cdf"
)

In [26]:
cdf

CDF:
  version: 3.7.1
  majority: Adaptative column
  compression: None

Attributes:
  Project: "STP>Solar-Terrestrial Physics"
  Source_name: "MMS1>MMS Satellite Number 1"
  Discipline: "Space Physics>Magnetospheric Science"
  Data_type: "brst_l2_dis-dist"
  Descriptor: "DIS>Dual Ion Spectrometers"
  File_naming_convention: "source_descriptor_datatype_yyyyMMddHHmmss"
  Data_version: "3.3.0"
  PI_name: "J. Burch, B. Giles"
  PI_affiliation: "SwRI, GSFC"
  TEXT: "FPI usually operates in Fast Survey Mode in the MMS Region Of Interest (ROI) for the current Mission Phase.  Data are taken at burst (30/150 ms for DES/DIS) resolution in this mode.  Data are also made available at survey (4.5 s, etc) resolution; these form a separate product from this.  Per mission design, not all burst-resolution data are downlinked.  This product contains phase-space distribution maps of those burst-resolution data selected for downlink.  In particular, the (highest possible quality at the time of release) c

### Get velocity distribution and its dependencies

In [27]:
zvar = _get_variable(cdf, "mms1_dis_dist_brst")
depend_keys = [zvar["attrs"][f"DEPEND_{i:d}"] for i in range(4)]
depends = [_get_variable(cdf, depend_key) for depend_key in depend_keys]
depends[0]["data"] = pycdfpp.to_datetime64(depends[0]["data"])

In [28]:
zvar

{'data': array([[[[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
           0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
          [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
           0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
          [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
           0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
          ...,
          [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
           0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
          [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
           0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
          [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
           0.0000000e+00, 0.0000000e+00, 0.0000000e+00]],
 
         [[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
           0.0000000e+00, 0.0000000e+00, 6.3660890e-27],
          [2.5740274e-19, 0.0000000e+00, 0.0000000e+00, ...,
           0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
          [0.0000000e+00, 0.00

### Create VDFs as xarray.Datasets

In [29]:
# Get dimension names
dimensions = [depend["attrs"]["LABLAXIS"] for depend in depends]

# Construct VDFs as xarray.Dataset
vdf_i = xr.Dataset(
    {
        "data": ([dimensions[0], "idx1", dimensions[2], "idx3"], zvar["data"]),
        dimensions[0]: depends[0]["data"],
        dimensions[1]: (
            [dimensions[0], "idx1"],
            depends[1]["data"],
        ),  #  dimension 1 (azimuth) is time dependent, therefore we use indices
        "idx1": np.arange(depends[1]["data"].shape[1]),
        dimensions[2]: depends[2]["data"],
        dimensions[3]: (
            [dimensions[0], "idx3"],
            depends[3]["data"],
        ),  #  dimension 3 (energy) is time dependent, therefore we use indices
        "idx3": np.arange(depends[3]["data"].shape[1]),
    }
)

In [30]:
vdf_i.energy

In [31]:
vdf_i.phi