In [1]:
import glob
import os
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from convert_geotiff_to_dataset import merge_Gtiffs_to_Dataset
from osgeo import gdal

#### Get subdatasets from hdf files

In [None]:
#sample file to extract subdatasets
fl=r"D:\VUB\_data\modis_ET\hdf_files\MOD16A2GF.A2000001.h21v08.061.2020256104621.hdf"
gdal.Open(fl,gdal.GA_ReadOnly)


#Get subdatasets. hdf files contain multiple subdatasets
subdatasets = gdal.Open(fl,gdal.GA_ReadOnly).GetSubDatasets()
subdatasets

#extracting each subdataset
subdataset_list=list()
for i in range(len(subdatasets)):
    #extracting the subdataset
    sds = gdal.Open(subdatasets[i][0], gdal.GA_ReadOnly)
    sub_dataset=subdatasets[i][0].split(':')[-1]
    subdataset_list.append(sub_dataset)

In [None]:
subdataset_list

In [None]:
subdatasets[1][0]

#### Extract clipped GeoTiff for a specified hdf subdataset

In [None]:
input_files = glob.glob(r"D:/VUB/_data/modis_ET/hdf_files/*.hdf")
input_files.sort()

out_folder_name = subdatasets[1][0].split(':')[-1]
out_folder = f"D:/VUB/_data/modis_ET/tiff_files/{out_folder_name}"
if not os.path.exists(out_folder):
    os.makedirs(out_folder, exist_ok=True)

for fl in input_files[2011:]:      
    #check files with 21v08 in the name
    if '21v08' in fl:

        out_file = f"{out_folder}/{os.path.basename(fl).replace('.hdf','.tif')}"

        # open dataset
        dataset = gdal.Open(fl,gdal.GA_ReadOnly)

        #Extracting the subdataset corresponding to LAI
        subdataset =  gdal.Open(dataset.GetSubDatasets()[1][0], gdal.GA_ReadOnly)
        
        # gdalwarp
        kwargs = {'format': 'GTiff', 'dstSRS': 'EPSG:4326', 'outputBounds': [33.8, 0.00, 39.5, 5.0],'creationOptions': ['COMPRESS=LZW']}
        print(f"Processing {out_file}", end='\r')
        ds = gdal.Warp(destNameOrDestDS=out_file,srcDSOrSrcDSTab=subdataset, **kwargs)
        del ds

#### Convert GeoTiff to netCDF

In [4]:
files=glob.glob(r"D:/VUB/_data/modis_ET/tiff_files/LE_500m/*.tif")
files.sort()
#define variables names from the files

first_var=[file for file in files if 'LE' in file]
#second_var=[file for file in files if 'LST_Night' in file]
#deep_profile_sm=[file for file in files if 'SoilMoist_P_tavg' in file]

var_list=[first_var]

##### Extracting date

In [None]:
#for files with jday
year = os.path.basename(var_list[0][0]).split('.')[1][1:5]
julian_day = os.path.basename(var_list[0][0]).split('.')[1][5:]
date_string = f"{year}{julian_day}"
date = pd.to_datetime(date_string, format='%Y%j')
date

In [None]:
#for files with this format: MODIS_2023_09_30_LST_Day

year = os.path.basename(var_list[0][0]).split('_')[1]
month = os.path.basename(var_list[0][0]).split('_')[2]
day = os.path.basename(var_list[0][0]).split('_')[3]
date_string = f"{year}{month}{day}"
date = pd.to_datetime(date_string, format='%Y%m%d')
# date

In [None]:
var = os.path.basename(var_list[0][0]).split('.')[0][17:]
var

##### Convert to xr.Dataset

In [5]:
var_dataset_list=[]
for i in range(len(var_list)):
    #variable=os.path.basename(profile[0]).split('_')[2]+'_'+os.path.basename(profile[0]).split('_')[3] 
    #variable=os.path.basename(profile[0]).split('.')[0][14:] #for gldas files
    #var_name=os.path.basename(var_list[i][0]).split('.')[0][17:]
    var_name = "LE"
    #define units
    # if var_name=='LST_Day'or var_name=='LST_Night':
    units='J/m^2/day'
    # elif variable=='Ec':
    #     units='mm'
    var_dataset=merge_Gtiffs_to_Dataset(files=var_list[i][:],variable=var_name, units=units) #(files: list, variable: str, year, month, day, units: str):
    #append to list
    var_dataset_list.append(var_dataset)

#merge datasets
var_dataset2=xr.merge(var_dataset_list)



Saving the dataset ata/modis_ET/tiff_files/LE_500m\MOD16A2GF.A2023361.h21v08.061.2024038103258.tif

Saved the LE dataset


In [None]:
len(files) #0:500, 500: #change var_dataset2

In [7]:
#merge the two datasets
dataset = xr.merge([var_dataset,var_dataset2])

In [13]:
dataset.attrs['units']='J/m^2/day	'
dataset.attrs['long_name']='MOD16A2GF.061 Average latent heat flux'
dataset.attrs['scale_factor']= 10000

#replace -9999 with nan and apply scaling factor
dataset=(dataset.where(var_dataset!=-9999)).astype('float32')

In [8]:
var_dataset2.to_netcdf(r"D:/VUB/_data/nc_files/modis_LE_2000_2023.nc")

#### Data exploration

In [2]:
modisLAI = xr.open_dataset(r"D:\VUB\_data\nc_files\modis_terra_LAI_2000_2024.nc")

In [None]:
var_dataset['LST_Day'].sel(lat =slice(4.0, 1.0), lon = slice(36.4, 37.4)).mean(dim=['lat','lon']).plot(figsize=(12,5))

In [7]:
#bulk surface resistance (100/0.5 * LAI)

bulk_surface_resistance=2/(modisLAI)

bulk_surface_resistance = bulk_surface_resistance.rename({'LAI':'rs'})
bulk_surface_resistance.attrs['units']='s/m'
bulk_surface_resistance.attrs['long_name']='bulk surface resistance'

In [None]:
bulk_surface_resistance['rs'].sel(lat =slice(2.0, 4.0), lon = slice(37.4, 39.4)).mean(dim=['lat','lon']).plot(figsize=(12,5))

In [23]:
lai_ts=modisLAI['LAI'].sel(lat =slice(1.0, 3.0), lon = slice(36.4, 38.4)).mean(dim=['lat','lon'])
lai_df = lai_ts.to_dataframe()

In [None]:
(2000/lai_df).plot(figsize=(12,5))

In [None]:
import seaborn as sns
sns.boxplot(data=(2000/lai_df))

In [8]:
#resample to monthly
var_dataset_mean_monthly=var_dataset.resample(time='ME').sum()

#export to netcdf
var_dataset_mean_monthly.to_netcdf(r"D:/VUB/_data/nc_files\GLDAS_GWS_monthly.nc")

#### For Land surface Temperature

In [31]:
#calculate daily mean temperatureLST
var_dataset_mean=(var_dataset['LST_Day']+var_dataset['LST_Night'])/2
#convert from K to C
var_dataset_mean=var_dataset_mean-273.15


var_dataset_mean=var_dataset_mean.rename('LST')
var_dataset_mean.to_dataset()

var_dataset_mean.attrs['units']='deg C'
var_dataset_mean.attrs['name']='LST_daily_mean'
var_dataset_mean.attrs['description']='MOD21C2.061 Terra Land Surface Temperature mean of 8-day LST_Day and LST_Night'

In [32]:
#Export to netcdf
var_dataset_mean.to_netcdf(r"D:/VUB/_data/nc_files/modis_LST_8day_2000_2004.nc")

In [62]:
#resample to monthly
var_dataset_mean_monthly=var_dataset_mean.resample(time='ME').mean()
var_dataset_mean_monthly=var_dataset_mean_monthly.rename('LST_monthly_mean')
var_dataset_mean_monthly.attrs['units']='K'
var_dataset_mean_monthly.attrs['name']='LST_monthly_mean'
var_dataset_mean_monthly=var_dataset_mean_monthly.to_dataset()

In [None]:
var_dataset_mean[2].plot(figsize=(10, 6), cmap='RdBu_r')

In [65]:
#export to netcdf
# var_dataset.to_netcdf(r"D:/VUB/_data/nc_files/grace_tws.nc")
var_dataset_mean_monthly.to_netcdf(r"D:/VUB/_data/nc_files/modis_terra_LST_mon.nc")

In [None]:
fig,ax=plt.subplots(figsize=(10,3.5))
[36.4, 0.9, 36.6, 1.1]
#slice by region
var_dataset_mean_monthly['LST_monthly_mean'].sel(lat=slice(1.1,0.9),lon=slice(36.4,36.6)).mean(dim=['lat','lon']).plot(ax=ax, label='0-10cm')
#plot on secondary axis
ax2=ax.twinx()
# var_dataset['TWS_tavg'].sel(lat=slice(1.1,0.9),lon=slice(36.4,36.6)).mean(dim=['lat','lon']).plot(ax=ax2, label='10-40cm', color='red')
plt.legend()