# Settings for Download: MODIS LST Terra

[MODIS/061/MOD11A1](https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MOD11A1)

In [1]:
# ADJUST ONLY THESE PARAMETERS!

# If loop broke, resume by setting this i to where you want to continue
skip_to_i = 0  # 0 = no skip

# Timescale of Interest
my_plus_years = 5  # Years to add after first visit
my_minus_years = 2  # Years to add before first visit
my_first_date = "-01-01"  # First date of first year "-MM-DD"
my_last_date = "-12-31"  # Last date of last year "-MM-DD"

# Dataset of Interest
product = "MODIS/061/MOD11A1"
product_start_date = "2000-02-24"
product_end_date = "2023-10-28"
product_scale = 1000

output_scale = product_scale
output_folder = "gee-raw-data/modis-lst-terra"

my_bands = ["LST_Day_1km", "QC_Day"]

In [2]:
# ADJUST IF NEEDED
download_only_remaining_files = True

# Import Libraries
import pandas as pd
import numpy as np
import os
import sys

sys.path.insert(0, "../../src")

# Import Functions
from run_mp import *
from gee_data_wrangling import *
from gee_data_download import *

# Load data from csv
data = get_location_site_df()

# Clean first and last date for download routine
data_clean = adjust_first_last_date(
    df=data,
    plus_years=my_plus_years,
    minus_years=my_minus_years,
    first_date=my_first_date,
    last_date=my_last_date,
)

# -----------------------
# Shortcut to only download remaining files
if download_only_remaining_files:
    # list all files in output_folder with .feather ending
    files = [f for f in os.listdir(output_folder) if f.endswith(".feather")]

    # extract the number from the filenames just before the '.feather' ending
    ids = [int(item.split("_")[1].split(".")[0]) for item in files]

    # filter data_clean to hold only id that are not in the folder already
    data_clean = data_clean[~data_clean["id"].isin(ids)]
    # data_clean
# -----------------------

# Split sites into groups of ten, to use all cores
sequence = np.arange(1, 11)  # This creates an array [1, 2, ..., 10]
repeated_sequence = np.tile(sequence, len(data_clean) // len(sequence) + 1)[
    : len(data_clean)
]
data_clean["group_id"] = repeated_sequence

# Create a list of all the groups
grouped = data_clean.groupby("group_id")
df_list = [group for name, group in grouped]

# Parallelize the download
run_mp(
    download_gee_data_PARALLEL,
    df_list,
    combine_func=pd.concat,
    progress_bar=True,
    num_cores=10,
    product=product,
    my_bands=my_bands,
    product_start_date=product_start_date,
    product_end_date=product_end_date,
    product_scale=product_scale,
    output_scale=output_scale,
    output_folder=output_folder,
    verbose=False,
)

100%|██████████| 10/10 [52:28<00:00, 314.83s/it]  


ValueError: All objects passed were None