In [1]:
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from datetime import datetime

In [3]:
dataset = xr.open_dataset("tco300_omimls_oct2004_to_dec2024_1by1.nc")
dataset

OMI/MLS Ground-to-300hPa Column Ozone for TOARII *y_elements: latitude, x_elements: longitude, period: October 2004 - December 2024

So now it is time for some data cleaning, there should not be negative values in the data

In [3]:
ozone = dataset["TropoO3"]
cleaned_ozone_dataset = ozone.where(ozone >= 0)
dataset["TropoO3"] = cleaned_ozone_dataset

In [4]:
ozone = cleaned_ozone_dataset 

# adding month from attributes to coordinate
month = ozone.attrs['Month'].split(',')
first_month = month[0]                              # October 2004
last_month = month[-1].replace(' ', '')             # December 2024

start_date = datetime.strptime(first_month, "%b%Y")
end_date = datetime.strptime(last_month, "%b%Y")

time_coords = pd.date_range(start=start_date, end=end_date, freq='MS')

In [5]:
# renaming and assigning/removing dimensions
ozone = ozone.assign_coords(time=('months', time_coords))
ozone = ozone.rename({"time": "yearmonth",'y_elements': 'latitude', 'x_elements': 'longitude'})
ozone = ozone.swap_dims({'months': 'yearmonth'})

Subsetting necessary months. This is January 2005 - December 2024 ONLY

In [6]:
ozone_subsetted = ozone.sel(yearmonth=slice("2005-01-01", "2024-12-01")) ####

In [7]:
# grouping by calendar month
monthly_mean = ozone_subsetted.groupby('yearmonth.month').mean('yearmonth')
monthly_stdev = ozone.groupby('yearmonth.month').std('yearmonth') # group it with means

In [8]:
monthly_mean = monthly_mean.assign_attrs(_FillValue="-999",name="Tropospheric Ozone Mean, Ground to 300 hPa")
monthly_stdev = monthly_stdev.assign_attrs(_FillValue="-999",name="Tropospheric Ozone STDEV, Ground to 300 hPa")

del monthly_mean.attrs['Month']
del monthly_stdev.attrs['Month']

#monthly_mean

In [9]:
# adding lat/lon from attributes to coordinates
lat_coords = ozone_subsetted.attrs['latitude'].split(',')
first_lat = float(lat_coords[0])
last_lat = float(lat_coords[-1])

lon_coords = ozone_subsetted.attrs['longitude'].split(',')
first_lon = float(lon_coords[0])
last_lon = float(lon_coords[-1])

lon_coords = np.arange(first_lon, 180.5, 1)
lat_coords = np.arange(first_lat,90.5, 1)

monthly_mean = monthly_mean.assign_coords(
    longitude=('longitude', lon_coords),
    latitude=('latitude', lat_coords)
)

monthly_stdev = monthly_stdev.assign_coords(
    longitude=('longitude', lon_coords),
    latitude=('latitude', lat_coords)
)

Now save as NETCDF4 files

In [14]:
monthly_mean.fillna(-999)

In [15]:
monthly_mean.to_netcdf(path="/glade/u/home/mvoncyga/SOARS_2025/OMIMLS_300hpa_monthly_mean_2005_2024.nc",format="NETCDF4")

In [16]:
monthly_stdev.fillna(-999)

In [17]:
monthly_stdev.to_netcdf(path="/glade/u/home/mvoncyga/SOARS_2025/OMIMLS_300hpa_monthly_stdev_2005_2024.nc",format="NETCDF4")