# Calculate statistics

In [1]:
from nes import *
from datetime import datetime, timedelta
import numpy as np
import xarray as xr

## 1. Read dataset

In [2]:
# Original path: /esarchive/exp/monarch/a4dd/original_files/000/2022111512/MONARCH_d01_2022111512.nc
# Rotated grid from MONARCH
cams_file = '/gpfs/projects/bsc32/models/NES_tutorial_data/MONARCH_d01_2022111512.nc'
%time nessy = open_netcdf(path=cams_file, info=True)

CPU times: user 587 ms, sys: 65.8 ms, total: 653 ms
Wall time: 5.16 s


In [3]:
print(nessy.time[0], len(nessy.time), nessy.time[-1])

2022-11-15 12:00:00 37 2022-11-17 00:00:00


### 1.3. Variables

In [4]:
# Selecting only one variable and dismiss the rest
nessy.keep_vars('O3')
nessy.variables

{'O3': {'data': None,
  'dimensions': ('time', 'lm', 'rlat', 'rlon'),
  'dtype': dtype('float32'),
  'long_name': 'TRACERS_044',
  'units': 'unknown',
  'standard_name': 'TRACERS_044',
  'coordinates': 'lon lat',
  'grid_mapping': 'rotated_pole'}}

#### After load

In [5]:
# Loading variable data from NetCDF file
%time nessy.load()

Rank 000: Loading O3 var (1/1)
Rank 000: Loaded O3 var ((37, 24, 271, 351))
CPU times: user 294 ms, sys: 1.49 s, total: 1.78 s
Wall time: 8.31 s


In [6]:
nessy.variables['O3']['data'].shape

(37, 24, 271, 351)

In [7]:
nessy.variables['O3']['dimensions']

('time', 'lm', 'rlat', 'rlon')

## 2. Calculate 8-hour rolling mean

In [8]:
%time rolling_mean = nessy.rolling_mean(var_list='O3', hours=8)
rolling_mean

Mean between 2022-11-15 05:00:00 and 2022-11-15 12:00:00 cannot be calculated because data for 2022-11-15 05:00:00 is not available.
Mean between 2022-11-15 06:00:00 and 2022-11-15 13:00:00 cannot be calculated because data for 2022-11-15 06:00:00 is not available.
Mean between 2022-11-15 07:00:00 and 2022-11-15 14:00:00 cannot be calculated because data for 2022-11-15 07:00:00 is not available.
Mean between 2022-11-15 08:00:00 and 2022-11-15 15:00:00 cannot be calculated because data for 2022-11-15 08:00:00 is not available.
Mean between 2022-11-15 09:00:00 and 2022-11-15 16:00:00 cannot be calculated because data for 2022-11-15 09:00:00 is not available.
Mean between 2022-11-15 10:00:00 and 2022-11-15 17:00:00 cannot be calculated because data for 2022-11-15 10:00:00 is not available.
Mean between 2022-11-15 11:00:00 and 2022-11-15 18:00:00 cannot be calculated because data for 2022-11-15 11:00:00 is not available.
Calculating mean between 2022-11-15 12:00:00 and 2022-11-15 19:00:00.

<nes.nc_projections.rotated_nes.RotatedNes at 0x7f2d2e4df390>

In [9]:
# Observe the NaNs that appeared when data was not available
rolling_mean.variables['O3']['data']

array([[[[       nan,        nan,        nan, ...,        nan,
                 nan,        nan],
         [       nan,        nan,        nan, ...,        nan,
                 nan,        nan],
         [       nan,        nan,        nan, ...,        nan,
                 nan,        nan],
         ...,
         [       nan,        nan,        nan, ...,        nan,
                 nan,        nan],
         [       nan,        nan,        nan, ...,        nan,
                 nan,        nan],
         [       nan,        nan,        nan, ...,        nan,
                 nan,        nan]],

        [[       nan,        nan,        nan, ...,        nan,
                 nan,        nan],
         [       nan,        nan,        nan, ...,        nan,
                 nan,        nan],
         [       nan,        nan,        nan, ...,        nan,
                 nan,        nan],
         ...,
         [       nan,        nan,        nan, ...,        nan,
                 nan,    

In [10]:
rolling_mean.variables['O3']['data'].shape

(37, 24, 271, 351)

In [11]:
rolling_mean.variables['O3']['dimensions']

('time', 'lm', 'rlat', 'rlon')

## 3. Calculate daily statistics

In [12]:
%time nessy.daily_statistic(op="mean")

CPU times: user 58.6 ms, sys: 5.01 ms, total: 63.6 ms
Wall time: 63 ms


In [13]:
nessy.variables['O3']['data'].shape

(2, 24, 271, 351)

In [14]:
nessy.variables['O3']['dimensions']

('time', 'lm', 'rlat', 'rlon')

In [15]:
# See metadata
nessy.variables['O3']['cell_methods']

'time: mean (interval: 1hr)'

In [16]:
# See time
nessy.time

[datetime.datetime(2022, 11, 15, 0, 0), datetime.datetime(2022, 11, 16, 0, 0)]

In [17]:
# See bounds
nessy.time_bnds

[[datetime.datetime(2022, 11, 15, 12, 0),
  datetime.datetime(2022, 11, 15, 23, 0)],
 [datetime.datetime(2022, 11, 16, 0, 0),
  datetime.datetime(2022, 11, 16, 23, 0)]]