#  ATLAS/ICESat-2 Monthly Gridded Sea Ice Freeboard
*Introduce dataset here*

In [1]:
#this cell will load dependencies for running the notebook in Google Colab
#this cell may take a while to run
import sys

#if code is running in google colab, run these cells to install neccessary libraries
if 'google.colab' in sys.modules: 
    !apt-get install -qq libgdal-dev libproj-dev
    !pip install --no-binary shapely shapely --force
    !pip install -q pyproj
    !pip install cartopy
    !pip install netcdf4
    !pip install xarray==0.16.0

# Import notebook dependencies

In [2]:
import os
import subprocess
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import pandas as pd
import numpy.ma as ma
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import pyproj
from glob import glob
from textwrap import wrap
from scipy.interpolate import griddata

#axes needed for plotting
#from matplotlib.axes import Axes
#from cartopy.mpl.geoaxes import GeoAxes
#GeoAxes._pcolormesh_patched = Axes.pcolormesh

#remove warnings to improve display
import warnings
warnings.filterwarnings('ignore')

#increase resolution for notebook outputs
%config InlineBackend.figure_format = 'retina'
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 100

```{tip}
Try running this notebook in Google Colab! Toggle over the rocketship icon at the top of the page and click Colab to open a new window and run the notebook. <br><br>To run a single cell, type **Shift+Enter**. To run the whole notebook, under **Runtime** click **Run all**. Note that you will have to run the notebook from the very beginning and load all the Google Colab dependencies for the code to work.
```

# Set desired date range 
Our analysis looks at winter data, so we'll only load data for the Northern Hemisphere winter season (Nov-Apr)

In [3]:
def getWinterDateRange(start_year, end_year, start_month = "November", end_month = "April"): 
    """ Gets date range for winter season/s
    Calling the function for start_year=2018, end_year=2020, start_month="November", end_month="April" will generate a date range from Nov 2018-Apr 2019 and Nov 2019-Apr 2020
    
    Args: 
        start_year (str): start year 
        end_year (str): end year 
        start_month (str, optional): month at which winter starts (default to November)
        end_month (str, optional): month at which winter ends (default to April)
        
    Returns: 
        winters (list): list of dates for all winter seasons in the input range (i.e: ['1980-11','1980-12','1981-01',
         '1981-02','1981-03','1981-04')
    """
    start_year = int(start_year)
    end_year = int(end_year)
    
    winters = []
    for year in range(start_year, end_year, 1):
        winters += pd.date_range(start = str(year) + '-' + start_month,
                                 end = str(year + 1) + '-' + end_month,
                                 freq = 'MS')
    winters = pd.to_datetime(winters)
    return winters

In [4]:
start_year = "2018"
end_year = "2020"
winter_months = getWinterDateRange(start_year, end_year)
print(winter_months)

DatetimeIndex(['2018-11-01', '2018-12-01', '2019-01-01', '2019-02-01',
               '2019-03-01', '2019-04-01', '2019-11-01', '2019-12-01',
               '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01'],
              dtype='datetime64[ns]', freq=None)


# Read data

In [5]:
def read_is2_data(data_dir="IS2SITMOGR4"): 
    """ Read in ATLAS/ICESat-2 Monthly Gridded Sea Ice Freeboard dataset. 
    If the file does not already exist on the user's local drive, it is downloaded from the books google storage bucket (https://console.cloud.google.com/storage/browser/is2-pso-seaice)
    The netcdf files for each month are then read in as an xr.Dataset object
    
    Args: 
        data_dir (str, optional): name of data directory containing ICESat-2 data (default to "IS2SITMOGR4", the name of the directory in the bucket)
    Returns: 
        is2_ds (xr.Dataset): data 
    
    """
    # Download data from bucket if it doesn't exist on the user's local drive
    exists_locally = os.path.isdir(data_dir)
    if (exists_locally == False): 
        print("Downloading ICESat-2 data from the google storage bucket...")
        os.system("gsutil -m cp -r gs://is2-pso-seaice/" + data_dir + " ./") # Make sure theres a space before the final ./ (i.e. " ./")
        print("Download complete")

    # Read in files for each month as a single xr.Dataset
    # Need to create a preprocessing function to call before merging because dimensions and coordinates are not set
    # This allows each DataArray for each month to be merged into one xr.Dataset
    def xr_set_coords_and_dims(da_monthly):
        da_monthly = da_monthly.set_coords(["latitude","longitude","xgrid","ygrid"]) # Set data variables as coordinates
        da_monthly = da_monthly.expand_dims("time") # Set month as a dimension 
        return da_monthly
    
    filenames = os.listdir(data_dir)
    is2_ds = xr.open_mfdataset([data_dir + "/" + filename for filename in filenames], # Filepath, including data directory in path
                               concat_dim=["time"], 
                               combine='nested', 
                               preprocess=xr_set_coords_and_dims)
    time = [file.split("IS2SITMOGR4_01_")[1].split("_004_001.nc")[0] for file in filenames] # Get time from filenames
    is2_ds = is2_ds.assign_coords({"time":pd.to_datetime(time, format = "%Y%m")}) # Add time as coordinate
    return is2_ds

In [6]:
is2_ds = read_is2_data() # Read in data
is2_ds = is2_ds.sel(time = winter_months) # Get winter months
print(is2_ds)

Downloading ICESat-2 data from the google storage bucket...
Download complete
<xarray.Dataset>
Dimensions:            (time: 12, x: 304, y: 448)
Coordinates:
    longitude          (y, x) float32 dask.array<chunksize=(448, 304), meta=np.ndarray>
    latitude           (y, x) float32 dask.array<chunksize=(448, 304), meta=np.ndarray>
    xgrid              (y, x) float32 dask.array<chunksize=(448, 304), meta=np.ndarray>
    ygrid              (y, x) float32 dask.array<chunksize=(448, 304), meta=np.ndarray>
  * time               (time) datetime64[ns] 2018-11-01 ... 2020-04-01
Dimensions without coordinates: x, y
Data variables:
    projection         (time) int32 -2147483647 -2147483647 ... -2147483647
    ice_thickness      (time, y, x) float32 dask.array<chunksize=(1, 448, 304), meta=np.ndarray>
    ice_thickness_unc  (time, y, x) float32 dask.array<chunksize=(1, 448, 304), meta=np.ndarray>
    num_segments       (time, y, x) float32 dask.array<chunksize=(1, 448, 304), meta=np.ndarray>

# Set interpolation preferences  
The dataset included uninterpolated data, or we can interpolate the data using a simple nearest neighbor interpolation function. Because ICESat-2 doesn't provide full monthly coverage, interpolating fills missing grid cells with a best guess based on surrounding data. This helps avoid sampling biases when performing time series analyses, with the cavaet that this interpolation method is subjective. <br><br>
In order to definte the interpolation bounds (so that we don't try and interpolate over land, or other areas where there wouldn't be sea ice!), we use the [NOAA/NSIDC Climate Data Record of Passive Microwave Sea Ice Concentration](https://nsidc.org/data/g02202) dataset in order to infer the location of the sea ice. A version of this dataset is included in the netcdf file associated with the jupyter book. If the user wants to interpolate the ICESat-2 data, the NOAA/NSIDC sea ice concentration data will be downloaded from the jupyter book. 

In [7]:
interpolate = True 

In [11]:
def read_book_data(filepath = "icesat2-book-data.nc"): 
    """ Read in data for ICESat2 jupyter book. 
    If the file does not already exist on the user's local drive, it is downloaded from the books google storage bucket (https://console.cloud.google.com/storage/browser/is2-pso-seaice)
    The netcdf file is then read in as an xr.Dataset object 
    
    Args: 
        filepath (str, optional): name of file to read in (default to "icesat2-book-data.nc", the name of the file in the bucket)
    Returns: 
        book_ds (xr.Dataset): data 
    
    """
    exists_locally = os.path.isfile(filepath) # Check if file exists on local drive
    if (exists_locally == False): # Download data 
        print("Downloading jupyter book data from the google storage bucket...")
        os.system("gsutil -m cp gs://is2-pso-seaice/" + filepath + " ./") # Make sure theres a space before the final ./ (i.e. " ./")
        print("Download complete")

    book_ds = xr.open_dataset(filepath)
    return book_ds

In [12]:
if interpolate == True: 
    cdr_ds = read_book_data()["seaice_conc_monthly_cdr"] # Get CDR data
    cdr_ds = cdr_ds.sel(time = winter_months) # Get winter months
    #is2_interp = 
    

Downloading jupyter book data from the google storage bucket...
Download complete


In [None]:
if interpolate == True: 
    is2_ds = interpolate_is2(is2_data = is2_ds, seaice_cdr = cdr_ds)
    

In [None]:
# Select a few variables of interest
data_vars = ["ice_thickness","ice_thickness_unc","ice_type","freeboard"]
is2_ds = is2_ds[data_vars]

In [None]:
lons = is2_ds.longitude.values
lats = is2_ds.latitude.values


var = "ice_thickness"
var_interp_list = []
method = 'linear'
for month in is2_ds.time.values: 
    da = is2_ds[var].sel(time = month) # Select just one month of data
    np_da = da.values
    np_interp = griddata((lons[~np.isnan(np_da)], lats[~np.isnan(np_da)]), # Interpolate
                          np_da[~np.isnan(np_da)].flatten(),
                          (lons, lats), 
                          fill_value=np.nan,
                          method=method)
    da_interp = xr.DataArray(data=np_interp, # convert numpy array --> xr.DataArray
                             dims=da.dims, 
                             coords=da.coords,
                             attrs={**da.attrs,'interpolation_method':method},
                             name=da.name)
    da_interp = da_interp.expand_dims("time") # Add time as a dimension. Allows for merging DataArrays 
    var_interp_list.append(da_interp)
var_interp = xr.merge(var_interp_list)