# Download, crop and save OSTIA

### Summary 
This notebook uses Python to download OSTIA SST using the Copernicus Marine Python package (`copernicusmarine`).

### Outputs
* A netcdf file with the raw SST data

### Notes
* Runtime ~5 minutes
* Data product [webpage](https://data.marine.copernicus.eu/product/SST_GLO_SST_L4_NRT_OBSERVATIONS_010_001/services)

In [None]:
import os
import glob
import tempfile
from pathlib import Path

import copernicusmarine
import xarray as xr
import rioxarray
import geopandas as gpd

In [None]:
REPO_ROOT = Path('/Users/rwegener/repos/journalarticle_chesapeake_mhw/')

## Login and download

In [None]:
copernicusmarine.login()

In [None]:
# Create temporary directory for storing un-cropped raw data
scratch_dir = tempfile.TemporaryDirectory()

In [None]:
dataset_id = 'METOFFICE-GLO-SST-L4-NRT-OBS-SST-V2'

# Used OSTIA version 3.5
copernicusmarine.subset(
    dataset_id = dataset_id,
    variables = ["analysed_sst"],
    start_datetime = "2003-01-01T00:00:00",
    end_datetime = "2023-12-31T00:00:00",
    minimum_longitude = -77.5,
    maximum_longitude = -75.5, 
    minimum_latitude = 36.75,
    maximum_latitude = 40,
    output_directory = scratch_dir.name,
)

In [None]:
downloaded_files = glob.glob(scratch_dir.name + '/*.nc')
if len(downloaded_files) > 1:
    raise Exception('Only 1 file should be downloaded from Copernicus')
else:
    print('Download successful')
    filepath = downloaded_files[0]

## Crop out Delaware Bay

In [None]:
# Create the geopandas dataframe of the Chesapeake Bay shape for masking
cbay_wkt = (
    'POLYGON ((-75.07331635657022 36.69945277755481,' 
    '-75.07331761665449 38.10656782772858, -75.37020665599995 38.29321651673962,'  
    '-75.7561692781297 39.85271304991599, -77.9036114835175 39.860284284356595,'
    '-77.9432159124284 36.7312001366339, -75.07331635657022 36.69945277755481))'
)
cbay_gdf = gpd.GeoDataFrame(geometry=gpd.GeoSeries.from_wkt([cbay_wkt]), 
                            crs='EPSG:4326')

In [None]:
cbay_gdf.geometry.iloc[0]

In [None]:
# Open scratch file
ds = xr.open_dataset(filepath)
# set CRS (prerequisite for masking)
ds.rio.write_crs("epsg:4326", inplace=True)

In [None]:
# Subset and mask out the Delaware Bay
ds_chesapeake = ds_chesapeake.rio.clip(
    cbay_gdf.geometry.values, cbay_gdf.crs, drop=False
)

## Save Dataset

In [None]:
output_dir = REPO_ROOT / 'data/01_raw'
# Make the data folder if it doesn't exist already
output_dir.mkdir(parents=True, exist_ok=True)

In [None]:
ds_chesapeake.to_netcdf(
    output_dir / 
    'METOFFICE-GLO-SST-L4-NRT-OBS-SST-V2_analysed_sst_CB_20070101-20231231.nc'
)

In [None]:
# Delete the temporary directory
scratch_dir.cleanup()