<a href="https://colab.research.google.com/github/sanAkel/ufs_diurnal_diagnostics/blob/main/ARAFS/prep_data/download_buoy_iQuam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Gather case(s) to investigate
- [Start with slides from Vijay](https://docs.google.com/presentation/d/1FXhrcXLyC2L1fdVLXxjJq-1IbEAfROW230I02AnYx6M/edit?usp=sharing)
- [Look closely at the 12/2022- 01/2023 event.](https://www.ncei.noaa.gov/monitoring-content/sotc/national/2023/jan/ar-total-precip-1-17-23.jpg)

# Inputs


- URLs of data to download:
  - [in- situ SST from NESDIS iQuam](https://www.star.nesdis.noaa.gov/socd/sst/iquam/index.html)

- Start and end dates of `case 1`.  

In [None]:
urls = ['https://star.nesdis.noaa.gov/pub/socd/sst/iquam/v2.10/202212-STAR-L2i_GHRSST-SST-iQuam-V2.10-v01.0-fv02.0.nc', 'https://star.nesdis.noaa.gov/pub/socd/sst/iquam/v2.10/202301-STAR-L2i_GHRSST-SST-iQuam-V2.10-v01.0-fv02.0.nc']

print(urls)

# start and end dates of `case 1`:
start_date = date(2022, 12, 20)
end_date = date(2023, 1, 20)

In [None]:
# Mount (google) drive to be able save data that will be processed

from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import glob as glob

import numpy as np
import xarray as xr
import pandas as pd

from datetime import date, timedelta

In [None]:
def load_iquam_data(iquam_file_path, obs_ids, obsType):
  """
  Loads iQuam data from a NetCDF file and subsets it based on:
  - Observation type.

  Args:
    iquam_file_path: Path to the iQuam NetCDF file.
    obs_ids: Dictionary mapping observation type names to their IDs.
    obsType: String representing the desired observation type (e.g., 'buoy').

  Returns:
    A tuple containing the subsetted data arrays:
    (oType_subset, year, month, day, hour, minute, pType, lat, lon, sst, qcFlag)
  """
  ds = xr.open_dataset(iquam_file_path, decode_timedelta=False)

  oType = ds.platform_type.values[:]
  oType_subset = oType[oType == obs_ids[obsType]]

  year = ds.year.values[:][oType == obs_ids[obsType]]
  month = ds.month.values[:][oType == obs_ids[obsType]]
  day = ds.day.values[:][oType == obs_ids[obsType]]
  hour = ds.hour.values[:][oType == obs_ids[obsType]]
  minute = ds.minute.values[:][oType == obs_ids[obsType]]

  pId = ds.platform_id.values[:][oType == obs_ids[obsType]]

  lat = ds.lat.values[:][oType == obs_ids[obsType]]
  lon = ds.lon.values[:][oType == obs_ids[obsType]]
  sst = ds.sst.values[:][oType == obs_ids[obsType]]
  qcFlag = ds.quality_level.values[:][oType == obs_ids[obsType]]

  #depth = None
  #if obsType == 'Argo':
  #depth = ds.depth.values[:][oType == obs_ids[obsType]]

  return (oType_subset, year, month, day, hour, minute, pId, lat, lon, sst, qcFlag)

In [None]:
def load_and_subset_iquam_data(obsType, oType, year, month, day, hour, minute, pId, lat, lon, sst, qcFlag, best_quality_value):

  mask = qcFlag == best_quality_value

  # Apply the mask to all data arrays
  oType_filtered = oType[mask]
  year_filtered = year[mask]
  month_filtered = month[mask]
  day_filtered = day[mask]
  hour_filtered = hour[mask]
  minute_filtered = minute[mask]
  pId_filtered = pId[mask]
  lat_filtered = lat[mask]
  lon_filtered = lon[mask]
  sst_filtered = sst[mask]
  qcFlag_filtered = qcFlag[mask]

  #if obsType == 'Argo':
  #depth_filtered = depth[mask]

  return (oType_filtered, year_filtered, month_filtered, day_filtered,
          hour_filtered, minute_filtered, pId_filtered, lat_filtered,
          lon_filtered, sst_filtered, qcFlag_filtered)

In [None]:
def filter_iquam_by_input_date(oType, year, month, day, hour, minute, pId, lat, lon, sst, qcFlag, in_year, in_mon, in_day):
    """
    Filters iQuam data arrays based on the input year, month, and day.

    Args:
        oType, year, month, day, hour, minute, pType, lat, lon, sst, qcFlag: Data arrays.
        in_year, in_mon, in_day: Integer values for the year, month, and day of interest.

    Returns:
        A tuple containing the filtered data arrays.
    """
    mask = (year == in_year) & (month == in_mon) & (day == in_day)

    oType_filtered = oType[mask]
    year_filtered = year[mask]
    month_filtered = month[mask]
    day_filtered = day[mask]
    hour_filtered = hour[mask]
    minute_filtered = minute[mask]
    pId_filtered = pId[mask]
    lat_filtered = lat[mask]
    lon_filtered = lon[mask]
    sst_filtered = sst[mask]
    qcFlag_filtered = qcFlag[mask]

    return (oType_filtered, year_filtered, month_filtered, day_filtered,
            hour_filtered, minute_filtered, pId_filtered, lat_filtered,
            lon_filtered, sst_filtered, qcFlag_filtered)

In [None]:
drive_path = '/content/drive/MyDrive/UFS-no-RTOFS/AR/work/data/'
os.makedirs(drive_path, exist_ok=True)

for url in urls:
  !wget -P "{drive_path}" "{url}"

In [None]:
# Check to make sure files have been successfully downloaded

iquam_fNames = glob.glob(drive_path + '/*.nc')

print("The following files are now available:\n")
for fName in iquam_fNames:
  print(fName)

# Analyze data from drifting buoys (in above downloaded files):
- Drifting buoys follow currents.
- They also measure Sea Surface Temperature (SST; at about 20 cm depth)

## Note:
- Downloaded files have data from a bunch of (other) observing platforms besides drifting buoys.

In [None]:
obs_ids = {'ship': 1,
          'buoy': 2,
          'tMoor': 3,
          'cMoor': 4,
          'Argo': 5}

obsType = 'buoy' # 'buoy' 'cMoor'
use_best_quality = True # Use certain quality of obs?
best_quality_value = 5 # Threshold for quality flag

if obsType == 'Argo':
  print(f"{obsType} is not allowed for now.\n If aren't looking carefully, bear consequences!\n")

In [None]:
# Subset observations of selected type, quality and date

current_date = start_date
while current_date <= end_date:
  iYear, iMon, iDay = [current_date.year, current_date.month, current_date.day]

  iquam_fName = glob.glob(drive_path+ f'{iYear}{iMon:02d}*.nc')[0]
  print(f'Reading {obsType} from {iquam_fName}')

  oType, year, month, day, hour, minute, pId, lat, lon, sst, qcFlag =\
  load_iquam_data(iquam_fName, obs_ids, obsType)
  #print(f"Number of obs in ENTIRE month: {len(oType), len(sst)}")

  if use_best_quality:
    oType, year, month, day, hour, minute, pId, lat, lon, sst, qcFlag =\
    load_and_subset_iquam_data(obsType, oType, year, month, day, hour, minute, pId, lat, lon, sst, qcFlag, best_quality_value)
    #print(f"Best quality: {len(oType), len(sst)}")

  oType, year, month, day, hour, minute, pId, lat, lon, sst, qcFlag = filter_iquam_by_input_date(oType, year, month, day, hour, minute, pId, lat, lon, sst, qcFlag, iYear, iMon, iDay)

  if len(sst) > 0:
    print(f"{iYear}/{iMon}/{iDay} \t Gathered {len(sst)} buoy SSTs.")

    # Create an xarray Dataset
    ds_out = xr.Dataset({
        'oType': ('obs', oType),
        'year': ('obs', year),
        'month': ('obs', month),
        'day': ('obs', day),
        'hour': ('obs', hour),
        'minute': ('obs', minute),
        'pId': ('obs', pId),
        'lat': ('obs', lat),
        'lon': ('obs', lon),
        'sst': ('obs', sst),
        'qcFlag': ('obs', qcFlag)
    },
    coords={
        'obs': np.arange(len(oType))
    })
    output_filename = f'iquam_{obsType}_{iYear}{iMon:02d}{iDay:02d}.nc'
    output_path = drive_path + output_filename
    ds_out.to_netcdf(output_path)
    print(f"Saved subset data to {output_path}\n")

  current_date += timedelta(days=1)