# Chapter 7 - Atmospheric Data Example
### Analyze high ozone events in the atmosphere over a region

In this chapter we exemplify the use of an atmospheric data set, in this case ozone (atually N02 tropospheric column density, that poxies tropospheric ozone), characterize its variability over a given region, and identify high concentration events and # of days of high ozone levels year.

This data estimates ozone in the whole air column, which is not necessarly reflection of ozone at ground level, so it is not a recommended for air quality at the surface. We selected this one because of the lenght of its data

In [5]:
# libraries
# libraries
import numpy as np
import pandas as pd
import xarray as xr
import datetime as dt
import matplotlib.pyplot as plt 
import hvplot.pandas
import hvplot.xarray
import fsspec
import s3fs
import dask
from dask.distributed import performance_report, Client, progress
xr.set_options(display_style="html")  #display dataset nicely

# this library helps to make your code execution less messy
import warnings
warnings.simplefilter('ignore') # filter some warning messages

In [15]:
def fix_accum_var_dims(ds, var):
    # Some varibles like precip have extra time bounds varibles, we drop them here to allow merging with other variables
    
    # Select variable of interest (drops dims that are not linked to current variable)
    ds = ds[[var]]  

    if var in ['air_temperature_at_2_metres',
               'dew_point_temperature_at_2_metres',
               'air_pressure_at_mean_sea_level',
               'northward_wind_at_10_metres',
               'eastward_wind_at_10_metres']:
        
        ds = ds.rename({'time0':'valid_time_end_utc'})
        
    elif var in ['precipitation_amount_1hour_Accumulation',
                 'integral_wrt_time_of_surface_direct_downwelling_shortwave_flux_in_air_1hour_Accumulation']:
        
        ds = ds.rename({'time1':'valid_time_end_utc'})
        
    else:
        print("Warning, Haven't seen {var} varible yet! Time renaming might not work.".format(var=var))
        
    return ds

@dask.delayed
def s3open(path):
    fs = s3fs.S3FileSystem(anon=True, default_fill_cache=False, 
                           config_kwargs = {'max_pool_connections': 20})
    return s3fs.S3Map(path, s3=fs)


def open_era5_range(start_year, end_year, variables):
    ''' Opens ERA5 monthly Zarr files in S3, given a start and end year (all months loaded) and a list of variables'''
    
    
    file_pattern = 'era5-pds/zarr/{year}/{month}/data/{var}.zarr/'
    
    years = list(np.arange(start_year, end_year+1, 1))
    months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
    
    l = []
    for var in variables:
        print(var)
        
        # Get files
        files_mapper = [s3open(file_pattern.format(year=year, month=month, var=var)) for year in years for month in months]
        
        # Look up correct time dimension by variable name
        if var in ['precipitation_amount_1hour_Accumulation']:
            concat_dim='time1'
        else:
            concat_dim='time0'
            
        # Lazy load
        ds = xr.open_mfdataset(files_mapper, engine='h5netcdf', 
                               concat_dim=concat_dim, combine='nested', 
                               coords='minimal', compat='override', parallel=True)
        
        # Fix dimension names
        ds = fix_accum_var_dims(ds, var)
        l.append(ds)
        
    ds_out = xr.merge(l)
    
    return ds_out

In [16]:
%%time
ds = open_era5_range(2018, 2020, ['air_pressure_at_mean_sea_level'])

ds

air_pressure_at_mean_sea_level


AttributeError: 'FSMap' object has no attribute 'tell'

In [8]:
ds

Delayed('get_geo_data-5c726605-41cc-48d7-8218-a5e5dfe49a39')

In [7]:
%%time
subset = ds.sel(x=slice(-99,-98),y=slice(18, 19))  #reduce to GS region

masked = subset.LST.where(subset.DQF==0).compute()

masked.isel(time=14).plot(vmin=14+273.15,vmax=30+273.15,cmap='inferno')

IndexError: index -1 is out of bounds for axis 0 with size 0

In [None]:
file_location =  's3://nasanex/MODIS'

ds_ndvi = xr.open_zarr(fsspec.get_mapper(file_location, anon=True),consolidated=True)

ds_ndvi

In [None]:
# input parameters:
# area to analyze: lat, lon ranges
# time frame

In [None]:
# read ozone data
# https://registry.opendata.aws/omi-no2-nasa/
# https://aura.gsfc.nasa.gov/omi.htmld
# s3://omi-no2-nasa/   
# look at the data, description and attributes

# plot overall climatology for the region, and max/min values (orr 10-90th percentiles)''

In [None]:
# plot time series
# identify moments above 90#
# count # days per year above it per year

In [None]:
# resources