<a href="https://colab.research.google.com/github/sinajahangir/Cload-Data-Retrieval/blob/main/DaymetRetrieval_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

First version: July 2025
Sina Jahangir

Downloads and processes Daymet v4 (Thornton et al., 2022) data using Google Earth Engine (GEE) API.

Extracts time-series data for points (x,y) and shapefiles efficiently

Saves results to a Pandas DataFrame for analysis or plotting

Reference:
Thornton, M.M., R. Shrestha, Y. Wei, P.E. Thornton, S-C. Kao, and B.E. Wilson. 2022. Daymet: Daily Surface Weather Data on a 1-km Grid for North America, Version 4 R1. ORNL DAAC, Oak Ridge, Tennessee, USA. https://doi.org/10.3334/ORNLDAAC/2129

Key Features:

✅ Custom data pre-process (feature selection)

✅ Modular fraemwrok for efficiency and reproducibility

✅ GEE utilization through Google Cloud

MIT License
Copyright (c) [2025] [Sina Jahangir]

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

# Install dependencies

In [2]:
# Install libraries
!pip install earthengine-api # library used to access ee
# Library import
import ee
import pandas as pd
import os
import geopandas as gpd
import json



# Access GEE

In [3]:
# Initialization
## Google Authentication
ee.Authenticate()  # Authenticate with your Google account
## Google Earth's API
# Initialize the library
#change based on your defined project on Google cloud
ee.Initialize(project='...')

print(ee.String('Hello from the Earth Engine servers!').getInfo())
## Access Google drive
from google.colab import drive
drive.mount('/content/drive')

Hello from the Earth Engine servers!
Mounted at /content/drive


# Set settings

In [5]:
#Change directory to where Shapefiles are saved
# This is for data retrieval based on shapefile
os.chdir('/content/drive/MyDrive/PHIMP-Flood') #change this

In [6]:
# Define list of shapefiles
shapefiles=['CanHydroFabric_Watersheds.shp']

In [7]:
# Check if the directory exists before creating it to save the results
'''
Unfortunately, Google Earth Engine (GEE) Export.table.toDrive does (see below) not support subfolder paths
(like 'parent/child') in the folder parameter.
The folder must be a top-level folder in your Google Drive (My Drive),
and it will not create nested folders.
'''
savefolder_name='DaymetExtraction'
if not os.path.exists('/content/drive/MyDrive/%s'%(savefolder_name)):
    os.mkdir('/content/drive/MyDrive/%s'%(savefolder_name))
else:
    print("Directory already exists.")

# Data retrieval functions

In [8]:
def get_dataset_for_shapefile(shapefile, start_date='1980-01-01',\
              end_date='2023-12-31',export_name='daymet_export'):
    """Retrieve and filter the DayMet dataset for a specific time range and spatially average over a shapefile."""
    # Initialize Earth Engine

    # Load the DayMet dataset and filter by date
    dataset = ee.ImageCollection('NASA/ORNL/DAYMET_V4')
    dataset = dataset.filter(ee.Filter.date(start_date, end_date))

    # Select all relevant variables
    variables = ['prcp','srad','tmax', 'tmin','vp']
    dataset = dataset.select(variables)

    # Read the shapefile and convert to GeoJSON (ensure it's in lat/lon)
    gdf = gpd.read_file(shapefile).to_crs(epsg=4326)
    geojson_dict = json.loads(gdf.to_json())

    # Convert to Earth Engine FeatureCollection
    region = ee.FeatureCollection(geojson_dict)

    def reduce_image(image):
        mean_dict = image.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=region.geometry(),
            scale=1000,
            maxPixels=1e13
        )
        return ee.Feature(None, mean_dict).set('date', image.date().format('YYYY-MM-dd'))

    reduced = dataset.map(reduce_image)

    task = ee.batch.Export.table.toDrive(
        collection=reduced,
        description=export_name,
        fileFormat='CSV',
        folder=savefolder_name,
        fileNamePrefix=export_name,
        selectors=['date']+ variables
    )
    task.start()
    print(f'Export started: {export_name}. Check Google Drive.')
# Point extraction (excluded for now)
'''
def get_dataset_for_point(lat, lon, start_date='1980-01-01', end_date='2023-12-31'):
    """Retrieve and filter the DayMet dataset for a specific time range and location."""

    # Load the DayMet dataset and filter by date
    dataset = ee.ImageCollection('NASA/ORNL/DAYMET_V4')
    dataset = dataset.filter(ee.Filter.date(start_date, end_date))

    # Select all relevant variables
    variables = ['tmax', 'tmin', 'prcp', 'srad', 'vp']
    dataset = dataset.select(variables)

    # Define the point of interest
    point = ee.Geometry.Point(lon, lat)

    # Extract data for the given location
    data = dataset.getRegion(point, scale=1000).getInfo()

    return data

def convert_to_dataframe(data):
    """Convert the extracted data to a Pandas DataFrame."""
    if not data or len(data) < 2:
        return pd.DataFrame()

    columns = data[0]  # First row contains column names
    values = data[1:]  # Remaining rows contain data
    df = pd.DataFrame(values, columns=columns)
    return df
'''

'\ndef get_dataset_for_point(lat, lon, start_date=\'1980-01-01\', end_date=\'2023-12-31\'):\n    """Retrieve and filter the DayMet dataset for a specific time range and location."""\n\n    # Load the DayMet dataset and filter by date\n    dataset = ee.ImageCollection(\'NASA/ORNL/DAYMET_V4\')\n    dataset = dataset.filter(ee.Filter.date(start_date, end_date))\n\n    # Select all relevant variables\n    variables = [\'tmax\', \'tmin\', \'prcp\', \'srad\', \'vp\']\n    dataset = dataset.select(variables)\n\n    # Define the point of interest\n    point = ee.Geometry.Point(lon, lat)\n\n    # Extract data for the given location\n    data = dataset.getRegion(point, scale=1000).getInfo()\n\n    return data\n  \ndef convert_to_dataframe(data):\n    """Convert the extracted data to a Pandas DataFrame."""\n    if not data or len(data) < 2:\n        return pd.DataFrame()\n\n    columns = data[0]  # First row contains column names\n    values = data[1:]  # Remaining rows contain data\n    df = pd.

# Extract and save the data as CSV file(s)

In [9]:
export_name='02GA014' #change this
# looping through the shapefiles
for ii in range(len(shapefiles)):
    get_dataset_for_shapefile(shapefiles[ii],export_name=export_name)
'''
This might take some time to complete.
'''

Export started: 02GA014. Check Google Drive.
