# ECOSTRESS data wrangling

From [AppEEARS](https://lpdaacsvc.cr.usgs.gov/appeears), I've downloaded a whole collection of geotiffs for ECOSTRESS LST for a bounding box around Clear Lake. 

Open all those images and convert to NetCDF files with some data formatting/cleanup.

In [1]:
import os
import xarray as xr
import rioxarray as rioxr
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import box, mapping

import matplotlib.pyplot as plt
%matplotlib inline

Set the path to the top level directory where all our ECOSTRESS images are, their file extension, and product name.

In [2]:
eco_tiff_path = '/home/jovyan/data/ECOSTRESS/'
ext = '.tif'
product = 'SDS_LST_doy'

Get a list of all our ECOSTRESS LST files

In [3]:
# get list of all tif files
file_list = []
for root, dirs, files in os.walk(eco_tiff_path):
    for file in files:
        if file.endswith(ext):
            if product in file:
                 file_list.append( os.path.join(root, file) ) 

In [4]:
print('Found {} files'.format(len(file_list)))

Found 358 files


Open each file, reformat and export to a NetCDF file

In [5]:
rasterlist = [xr.open_rasterio(x, chunks={'x': 256, 'y': 256}) for x in file_list]

In [6]:
for i, this_eco_lst in enumerate(rasterlist):
    
    # open this file
    #this_eco_lst = xr.open_rasterio(file_list[i])
    
    # get the timestring from the filename
    file_datestring = file_list[i].split('/')[-1].split('_')[-2][3:]
    
    # convert to pandas timestamp
    file_timestamp = pd.to_datetime(file_datestring, format='%Y%j%H%M%S')
    
    # unpack all our values
    lst = this_eco_lst.values
    lon = this_eco_lst.x.values
    lat = this_eco_lst.y.values
    time = np.array([file_timestamp])
    metadata = this_eco_lst.attrs
       
    # repackage into a DataArray
    eco_lst_da = xr.DataArray(lst, 
                              coords=[time, lat, lon], 
                              dims=['time', 'y', 'x'],  
                              name='lst', 
                              attrs=metadata)
    
    eco_lst_da.to_netcdf('{}.nc'.format(file_list[i]))
