<a href="https://colab.research.google.com/github/rebeccah2202/diss/blob/main/nc_file_bylakeid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade --force-reinstall --no-deps --no-cache-dir netCDF4 --no-binary netCDF4

Collecting netCDF4
  Downloading netCDF4-1.6.5.tar.gz (764 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m765.0/765.0 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: netCDF4
  Building wheel for netCDF4 (pyproject.toml) ... [?25l[?25hdone
  Created wheel for netCDF4: filename=netCDF4-1.6.5-cp310-cp310-linux_x86_64.whl size=2590607 sha256=b045327a1b1d89a9fc10a2c0fac6164ff73218b6fbbb92ced19a1379ab997d3a
  Stored in directory: /tmp/pip-ephem-wheel-cache-pr0qcpsy/wheels/f0/9e/3d/ac311399c51eb3fc265c78aafe1447d7b4f0f577704bb84dc9
Successfully built netCDF4
Installing collected packages: netCDF4
Successfully installed netCDF4-1.6.5


In [None]:
!pip install cftime
import os
import numpy as np
import xarray as xr
import datetime
import netCDF4 as nc



In [None]:
###########################################################################################
# input parameters
###########################################################################################

# lakes mask file
maskfile = 'ESA_CCI_static_lake_mask_v2.0.1.nc'

import os
if not os.path.exists(maskfile):
  !wget https://dap.ceda.ac.uk/neodc/esacci/lakes/data/lake_products/L3S/v2.0.1/ESA_CCI_static_lake_mask_v2.0.1.nc

# lake ID
lake_id = 12262

# defining the period of time in string format: YYYY-MM-DD
# dates values must be between 1992-09-26 and 2020-12-31
mindate = '2018-04-01'
maxdate = '2018-09-30'

# version dataset (2.0.2 is the version published in July 2022)
version = '2.0.2'

# output
outdir = 'output/Leven'
outprefix = 'Leven_'

In [None]:
# test if dates are in the temporal coverage

mindate = datetime.datetime.strptime(mindate, '%Y-%m-%d')
maxdate = datetime.datetime.strptime(maxdate, '%Y-%m-%d')
mindate = max([mindate, datetime.datetime(1992,9,26)])
maxdate = min([maxdate, datetime.datetime(2020,12,31)])

In [None]:
# create the output directory if it does not exist
if os.path.exists(outdir)==False:
    os.makedirs(outdir)

In [None]:
###################################################################
# create mask base on lake_id
###################################################################

mask_nc = nc.Dataset(maskfile)

mask_ind  = np.where(mask_nc.variables['CCI_lakeid'][:] == lake_id)
minx = np.min(mask_ind[1][:]) - 1
maxx = np.max(mask_ind[1][:]) + 1

miny = np.min(mask_ind[0][:]) - 1
maxy = np.max(mask_ind[0][:]) + 1

mask_lake = mask_nc.variables['CCI_lakeid'][miny:maxy+1, minx:maxx+1].data
mask_lake[mask_lake!=lake_id] = 0
mask_lake[mask_lake == lake_id] = 1

mask_nc.close()

In [None]:
# The download process
import time

# Output file path
output_file = f'{outdir}/{outprefix}ESACCI-LAKES-L3S-LK_PRODUCTS-MERGED-{mindate.strftime("%Y%m%d")}_to_{maxdate.strftime("%Y%m%d")}-fv{version}.nc'

# Create an empty dataset to store the merged data
merged_dataset = None

# Record the start time
start_time = time.time()

# Loop over the dates
for data_date in np.arange(mindate.toordinal(), maxdate.toordinal()+1):
    current_date = datetime.datetime.fromordinal(data_date)
    date_str = current_date.strftime("%Y%m%d")
    #print (f'Downloading data from lake_id {lake_id} -  ESACCI-LAKES-L3S-LK_PRODUCTS-MERGED-{date_str}-fv{version}.nc')

    path  = f'https://data.cci.ceda.ac.uk/thredds/dodsC/esacci/lakes/data/lake_products/L3S/v{version}/'
    path += f'{current_date.year}/{current_date.month:02}/'
    path += f'ESACCI-LAKES-L3S-LK_PRODUCTS-MERGED-{date_str}-fv{version}.nc'

    dataset = xr.open_dataset(path)
    dataset = dataset.isel(lat=slice(miny, maxy+1), lon=slice(minx, maxx+1))

    # apply mask only for variables with three dimensions: time, lat, lon
    for var in dataset.data_vars:
        if len(dataset[var].dims) == 3:
            filval = dataset[var].encoding['_FillValue']
            data = dataset[var][0, :, :].values
            data[mask_lake == 0] = filval
            dataset[var][0, :, :] = data

    # Merge datasets
    if merged_dataset is None:
        merged_dataset = dataset
    else:
        merged_dataset = xr.concat([merged_dataset, dataset], dim='time')

# Save the merged dataset to a single file
merged_dataset.to_netcdf(output_file)

# Record the end time
end_time = time.time()

# Calculate and print the elapsed time
elapsed_time = end_time - start_time
print(f"Download process took {elapsed_time:.2f} seconds.")

Download process took 3810.57 seconds.
